Source code for modeci_mdf.interfaces.onnx.importer

"""
Code for importing ONNX models into MDF.
"""
import typing

import onnx

from onnx import (
    ModelProto,
    TensorProto,
    GraphProto,
    AttributeProto,
    numpy_helper,
    shape_inference,
)
from onnx.defs import get_schema

from modeci_mdf.mdf import *


[docs]def id_to_port(id: str):
    """Turn unique ONNX output and input value names into valid MDF input and outport names"""

    new_name = str(id).replace(".", "_")

    # Get rid of double colon in id names, this causes issues with execution engine.
    new_name = new_name.replace("::", "_")

    # If the first character is a digit, precede with an underscore so this can never be interpreted
    # as number down the line.
    if new_name[0].isdigit():
        new_name = "_" + new_name

    return new_name


[docs]def get_shape_params(shape: onnx.TensorShapeProto) -> typing.Tuple:
    """
    Small helper function to extract a tuple from the TensorShapeProto. These objects
    can contain both integer dimensions and parameter dimensions that are variable, like
    'batch_size'.

    Args:
        shape: The ONNX shape proto to process.

    Returns:
        A tuple that can contain both integers and strings for parameter dimensions.
    """
    shape = tuple(d.dim_param if d.dim_param != "" else d.dim_value for d in shape.dim)

    # If shape is empty tuple, its a scalar, make it size 1
    if len(shape) == 0:
        shape = (1,)

    return shape


[docs]def get_onnx_attribute(a):

    # Use the helpers to get the appropriate value
    val = onnx.helper.get_attribute_value(a)

    # get_attribute_value() can return TensorProto's, lets convert them to a list for JSON
    # FIXME: This begs the question, is JSON a good format for storing large tensors (nope)
    if type(val) == TensorProto:
        return numpy_helper.to_array(val).tolist()
    else:
        return val


[docs]def onnx_node_to_mdf(
    node: typing.Union[onnx.NodeProto, onnx.ValueInfoProto],
    onnx_initializer: typing.Dict[str, typing.Dict[str, typing.Any]],
) -> Node:
    """
    Construct an MDF node (and function) from an ONNX NodeProto or ValueInfoProto

    Args:
        node: The ONNX node to use to form the MDF node. Can be a node from the model or
            a ValueInfoProto specifying an input or output.
        onnx_initializer: A specification of values in the graph that ONNX has
            marked as initializer's. This dict is keyed on the name of the parameter,
            the value is another dict with three entries; shape, type, and value.

    Returns:
        The equivalent MDF node for the ONNX node passed in as argument.
    """

    # If this is a ONNX Node,
    if type(node) == onnx.NodeProto:

        # Create and MDF node with parameters
        # FIXME: We need to preserve type info somewhere
        params_dict = {a.name: get_onnx_attribute(a) for a in node.attribute}

        # For any attributes that are sub-graphs, we need to recurse
        for aname, val in params_dict.items():
            if type(val) == GraphProto:
                params_dict[aname] = onnx_to_mdf(val, onnx_initializer=onnx_initializer)

        # If we have we have value constants that feed into this node. Make them parameters
        # instead of input ports
        non_constant_inputs = []
        func_args = {}
        for inp_i, inp in enumerate(node.input):

            # Get the name of the formal argument that corresponds to this input.
            # We need to go to the schema for this.
            # FIXME: We need to make sure we are going the correct schema here ... yuck!
            try:
                arg_name = get_schema(node.op_type).inputs[inp_i].name
            except IndexError:
                arg_name = f"arg_{inp}"

            if inp in onnx_initializer and "value" in onnx_initializer[inp]:
                params_dict[arg_name] = onnx_initializer[inp]["value"]
                func_args[arg_name] = arg_name
            else:
                non_constant_inputs.append(inp)
                func_args[arg_name] = id_to_port(inp)

        # FIXME: parameters must be set or we get JSON serialization error later
        mdf_node = Node(id=node.name)

        for p in params_dict:
            if type(params_dict[p]) == Graph:
                mdf_node.parameters.append(
                    Parameter(
                        id=p, value={"graph_%s" % params_dict[p].id: params_dict[p]}
                    )
                )
            else:
                mdf_node.parameters.append(Parameter(id=p, value=params_dict[p]))

        # Add the function
        # FIXME: There is probably more stuff we need to preserve for ONNX Ops
        func = Parameter(id=node.name, function=f"onnx::{node.op_type}", args=func_args)
        mdf_node.parameters.append(func)

        # Recreate inputs and outputs of ONNX node as InputPorts and OutputPorts
        for inp in non_constant_inputs:
            param_info = onnx_initializer.get(inp, None)
            shape = param_info["shape"] if param_info else None
            ip = InputPort(id=id_to_port(inp), shape=shape, type="float")
            mdf_node.input_ports.append(ip)

        for out in node.output:
            op = OutputPort(id=id_to_port(out), value=func.id)
            mdf_node.output_ports.append(op)

    elif type(node) == onnx.ValueInfoProto:
        raise NotImplementedError()
        # # Lets start with an MDF node that uses the ONNX node name as its id. No parameters
        # mdf_node = Node(id=node.name)
        #
        # # This is an input or output node. No Op\Function or parameters. This is just
        # # a simple pass through node with an input and output port with the correct
        # # shape.
        # # FIXME: Should this be necessary? ONNX treats input and output nodes as simple named values.
        # ip1 = InputPort(id=f"in_port",
        #                 shape=str(get_shape_params(node.type.tensor_type.shape))) # FIXME: Why string?
        # mdf_node.input_ports.append(ip1)
        # op1 = OutputPort(id=node.name)
        # op1.value = f"in_port"
        # mdf_node.output_ports.append(op1)

    return mdf_node


[docs]def onnx_to_mdf(
    onnx_model: typing.Union[ModelProto, GraphProto],
    onnx_initializer: typing.Dict[str, typing.Dict[str, typing.Any]] = None,
):
    """
    Convert a loaded ONNX model into a MDF model.

    Args:
        onnx_model: The ONNX model to convert. Typically, this is the result of a call to onnx.load()
        onnx_initializer: A specification of values in the graph that ONNX has
            marked as initializer's. This dict is keyed on the name of the parameter,
            the value is another dict with three entries; shape, type, and value.

    Returns:
        An MDF description of the ONNX model.
    """

    if onnx_initializer is None:
        onnx_initializer = {}

    if type(onnx_model) == ModelProto:

        # Do shape inference on the model so we can get shapes of intermediate outputs
        # FIXME: This function has side-effects, it probably shouldn't
        try:
            onnx_model = shape_inference.infer_shapes(onnx_model)
        except RuntimeError:
            pass

        graph = onnx_model.graph

    else:
        graph = onnx_model

    # Get all the nodes in the onnx model, even the inputs and outputs
    onnx_nodes = list(graph.node)

    if hasattr(graph, "initializer"):
        # Parameters that have been initialized with values.
        # FIXME: We need a cleaner way to extract this info.
        onnx_initializer_t = {}
        for t in graph.initializer:
            t_np = numpy_helper.to_array(t)
            onnx_initializer_t[t.name] = {"shape": t_np.shape, "type": str(t_np.dtype)}

        # And the input and intermediate node shapes as well
        for vinfo in list(graph.input) + list(graph.value_info):
            vshape = get_shape_params(vinfo.type.tensor_type.shape)

            try:
                vtype = onnx.helper.printable_type(vinfo.type)
            except AssertionError:
                # Couldn't extract type
                vtype = None

            onnx_initializer_t[vinfo.name] = {"shape": vshape, "type": vtype}

        onnx_initializer = {**onnx_initializer, **onnx_initializer_t}

    # Finally, some nodes are constants, extract the values and drop the nodes.
    # They will be removed in the MDF and passed as named parameters to the Node
    constants = {}
    onnx_nodes_nc = []
    for onnx_node in onnx_nodes:
        if onnx_node.op_type == "Constant":
            v = get_onnx_attribute(onnx_node.attribute[0])
            constants[onnx_node.output[0]] = {
                "shape": v.shape if hasattr(v, "shape") else (1,),
                "type": str(v.dtype) if hasattr(v, "dtype") else str(type(v)),
                "value": v,
            }
        else:
            onnx_nodes_nc.append(onnx_node)
    onnx_nodes = onnx_nodes_nc

    # Add constants to the initializer dict
    onnx_initializer = {**onnx_initializer, **constants}

    mod_graph = Graph(id=graph.name)

    # Construct the equivalent nodes in MDF
    mdf_nodes = [
        onnx_node_to_mdf(node=node, onnx_initializer=onnx_initializer)
        for node in onnx_nodes
    ]

    mod_graph.nodes.extend(mdf_nodes)

    # Construct the edges, we will do this by going through all the nodes.
    node_pairs = list(zip(onnx_nodes, mod_graph.nodes))
    for onnx_node, mdf_node in node_pairs:
        if len(onnx_node.output) > 0:
            for i, out in enumerate(onnx_node.output):
                out_port_id = mdf_node.output_ports[i].id

                # Find all node input ports with this outport id
                # FIXME: This is slow for big graphs with lots of edges. Best to build a data structure for this.
                receiver = [
                    (m, ip)
                    for n, m in node_pairs
                    for ip in m.input_ports
                    if out_port_id == ip.id
                ]

                # Make an edge for each receiver of this output port
                for receiver_node, receiver_port in receiver:
                    edge = Edge(
                        id=f"{mdf_node.id}.{out_port_id}_{receiver_node.id}.{receiver_port.id}",
                        sender=mdf_node.id,
                        sender_port=out_port_id,
                        receiver=receiver_node.id,
                        receiver_port=receiver_port.id,
                    )

                    mod_graph.edges.append(edge)

    # If they passed an ONNX model, wrap the graph in a MDF model
    if type(onnx_model) == ModelProto:
        mod = Model(id="ONNX Model")
        mod.graphs.append(mod_graph)
        return mod

    else:
        return mod_graph


[docs]def find_subgraphs(
    graph: onnx.GraphProto, graph_dict: typing.Dict[str, GraphProto] = None
) -> typing.Dict[str, GraphProto]:
    """
    Recurse through an ONNX graph and find all subgraphs.

    Args:
        graph: The graph to search.
        graph_list: Insert graphs we find into this dict. Use the parent node name as a key.
            If None, intitialize to empty dict.

    Returns:
        All the subgraphs in the for the graph.
    """

    if graph_dict is None:
        graph_dict = {}

    for node in graph.node:
        for ai, attr in enumerate(node.attribute):
            if attr.type == AttributeProto.GRAPH:
                subgraph = onnx.helper.get_attribute_value(attr)
                graph_dict[f"{node.name}_attr{ai}"] = subgraph
                graph_dict = find_subgraphs(subgraph, graph_dict)
            elif attr.type == AttributeProto.GRAPHS:
                subgraphs = onnx.helper.get_attribute_value(attr)
                for gi, subgraph in enumerate(subgraphs):
                    graph_dict[f"{node.name}_attr{ai}_g{gi}"] = subgraph
                    graph_dict = find_subgraphs(subgraph, graph_dict)

    return graph_dict


[docs]def convert_file(input_file: str):
    """
    Simple converter from ONNX to MDF. Takes in ONNX files and generates MDF JSON/YAML files.

    Args:
        input_file: The input file path to the ONNX file. Output files are generated in same
            directory with -mdf.json and -mdf.yml extensions.

    Returns:
        MoneType
    """

    import os

    out_filename = f"{os.path.splitext(input_file)[0]}-mdf"
    onnx_model = onnx.load(input_file)
    onnx.checker.check_model(onnx_model)
    mdf_model = onnx_to_mdf(onnx_model)
    mdf_model.to_json_file(f"{out_filename}.json")
    mdf_model.to_yaml_file(f"{out_filename}.yaml")


# The data used for getting the name and categories of graphs are gotten here https://raw.githubusercontent.com/lutzroeder/netron/main/source/onnx-metadata.json'

# the data used for getting the color of categories of graphs are gotten here 'https://github.com/lutzroeder/netron/blob/b7a0be975f852c2c2fbce4a6fce69a37819b3601/source/grapher.css#L27'


new_dict = {
    "AveragePool": "Pool",
    "BatchNormalization": "Normalization",
    "Clip": "Activation",
    "Concat": "Tensor",
    "Constant": "Constant",
    "Conv": "Layer",
    "ConvInteger": "Layer",
    "ConvTranspose": "Layer",
    "Dropout": "Dropout",
    "Elu": "Activation",
    "Flatten": "Shape",
    "GRU": "Layer",
    "Gather": "Transform",
    "Gemm": "Layer",
    "GlobalAveragePool": "Pool",
    "GlobalLpPool": "Pool",
    "GlobalMaxPool": "Pool",
    "HardSigmoid": "Activation",
    "InstanceNormalization": "Normalization",
    "LRN": "Normalization",
    "LSTM": "Layer",
    "LeakyRelu": "Activation",
    "LogSoftmax": "Activation",
    "LpNormalization": "Normalization",
    "LpPool": "Pool",
    "MaxPool": "Pool",
    "MaxRoiPool": "Pool",
    "PRelu": "Activation",
    "Pad": "Tensor",
    "RNN": "Layer",
    "Relu": "Activation",
    "Reshape": "Shape",
    "Selu": "Activation",
    "Sigmoid": "Activation",
    "Slice": "Tensor",
    "Softmax": "Activation",
    "Softplus": "Activation",
    "Softsign": "Activation",
    "Split": "Tensor",
    "Squeeze": "Transform",
    "Tanh": "Activation",
    "ThresholdedRelu": "Activation",
    "Tile": "Shape",
    "Transpose": "Transform",
    "Unsqueeze": "Transform",
    "Upsample": "Data",
    "FusedConv": "Layer",
}

color_dict = {
    "Activation": ".4 .2 .1",
    "Layer": ".2 .3 .5",
    "Pool": ".2 .3 .2",
    "Normalization": ".2 .3 .3",
    "Tensor": ".3 .3 .2",
    "Transform": ".2 .3 .3",
    "Shape": ".4 .3 .3",
    "Dropout": ".3 .3 .4",
    "Data": ".3 .3 .3",
}


[docs]def get_category_of_onnx_node(entry):
    for key, value in new_dict.items():
        if key in entry:
            return value


[docs]def get_color_for_onnx_category(shape):
    a = {}
    for key, val in color_dict.items():
        if shape == key:
            a["color"] = val
            return a


[docs]def main():

    import argparse

    parser = argparse.ArgumentParser(
        description="Simple converter from ONNX to MDF. "
        "Takes in ONNX files and generates MDF JSON/YAML"
    )
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "input_file",
        type=str,
        help="An input ONNX file. "
        "Output files are generated in same directory "
        "with -mdf.json and -mdf.yml extensions.",
    )

    args = parser.parse_args()

    convert_file(args.input_file)


if __name__ == "__main__":
    main()