166 lines
7.1 KiB
Python
166 lines
7.1 KiB
Python
# -------------------------------------------------------------------------
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License. See License.txt in the project root for
|
|
# license information.
|
|
# --------------------------------------------------------------------------
|
|
"""Provide entry point to preprocess ONNX model especially for QNN."""
|
|
|
|
import argparse
|
|
import pathlib
|
|
|
|
import onnx
|
|
|
|
from onnxruntime.quantization.execution_providers import qnn
|
|
|
|
|
|
def _parse_arguments():
|
|
"""Parse cmdline arguments."""
|
|
parser = argparse.ArgumentParser(description="Arguments for QNN model preprocess.")
|
|
|
|
parser.add_argument("--input_model_path", "-i", required=True, help="Path to the input ONNX model.")
|
|
parser.add_argument("--output_model_path", "-o", required=True, help="Path to the output ONNX model.")
|
|
|
|
# Save preprocessed model with external data.
|
|
parser.add_argument(
|
|
"--save_as_external_data",
|
|
action="store_true",
|
|
help="Whether the output model would be saved with external data.",
|
|
)
|
|
parser.add_argument(
|
|
"--all_tensors_to_one_file",
|
|
action="store_true",
|
|
help="Whether to save all external data in one file or save each tensor to a file named with the tensor name.",
|
|
)
|
|
parser.add_argument(
|
|
"--external_data_location",
|
|
help="Filename of the external file where all tensors are saved. The path is relative to the model path.",
|
|
)
|
|
parser.add_argument(
|
|
"--external_data_size_threshold",
|
|
default=1024,
|
|
type=int,
|
|
help="Tensors with data size larger than this threshold are converted to external data.",
|
|
)
|
|
parser.add_argument(
|
|
"--external_data_convert_attribute",
|
|
action="store_true",
|
|
help="Whether to save all tensors, including attribute tensors, to external data.",
|
|
)
|
|
|
|
# Preprocess options.
|
|
parser.add_argument(
|
|
"--fuse_layernorm",
|
|
action="store_true",
|
|
help="Whether to fuse matched sequences into LayerNormalization nodes if possible.",
|
|
)
|
|
|
|
# I/O layouts.
|
|
parser.add_argument(
|
|
"--inputs_to_make_channel_last",
|
|
nargs="+",
|
|
default=None,
|
|
help="List of graph input names to be transposed into channel-last.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--outputs_to_make_channel_last",
|
|
nargs="+",
|
|
default=None,
|
|
help="List of graph output names to be transposed into channel-last.",
|
|
)
|
|
|
|
# Fix dynamic input shapes.
|
|
parser.add_argument(
|
|
"--dynamic_input_shapes",
|
|
nargs=2,
|
|
action="append",
|
|
type=str,
|
|
default=None,
|
|
help="Model input name and desired static shape in comma seprated format, for example: 'input' 1,3,256,256",
|
|
)
|
|
|
|
# Exclude initializer from input
|
|
parser.add_argument(
|
|
"--exclude_initializer_from_input",
|
|
action="store_true",
|
|
help="Whether to exclude initializer from input if model.ir_version >= 4",
|
|
)
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
def qnn_preprocess_model(
|
|
model_input: str | pathlib.Path | onnx.ModelProto,
|
|
model_output: str | pathlib.Path,
|
|
fuse_layernorm: bool = False,
|
|
save_as_external_data: bool = False,
|
|
all_tensors_to_one_file: bool = False,
|
|
external_data_location: str | None = None,
|
|
external_data_size_threshold: int = 1024,
|
|
external_data_convert_attribute: bool = False,
|
|
inputs_to_make_channel_last: list[str] | None = None,
|
|
outputs_to_make_channel_last: list[str] | None = None,
|
|
dynamic_input_shapes: list[tuple[str, str]] | None = None,
|
|
exclude_initializer_from_input: bool = False,
|
|
) -> bool:
|
|
"""Preprocess ONNX model for QNN.
|
|
|
|
Args:
|
|
model_input: A path or ONNX ModelProto specifiying the model to be preprocessed.
|
|
model_output: A path specifying where the preprocessed model to be saved.
|
|
fuse_layernorm: A bool specifying whether to fuse the matched sequence into a single LayerNormalization node.
|
|
Defaults to False.
|
|
save_as_external_data: A bool specifying whether to save model with external data. Defaults to False.
|
|
all_tensors_to_one_file: A bool specifying whether to save all external data in one file or save each tensor to
|
|
a file named with the tensor name. This argument is effective only when `save_as_external_data` is True.
|
|
Defaults to False.
|
|
external_data_location: A str specifying where to save the external data. The path is relative to the model
|
|
path. This argument is effective only when `save_as_external_data` is True. Defaults to the model name.
|
|
external_data_size_threshold: An int specifying the threshold of data size for tensors be saved as external
|
|
data. This argument is effective only when `save_as_external_data` is True. Defaults to 1024.
|
|
external_data_convert_attribute: A bool specifying whether to save all tensors including attributes as external
|
|
data. This argument is effective only when `save_as_external_data` is True. Defaults to False.
|
|
inputs_to_make_channel_last: A list of strs specifying graph input names to be transposed into channel-last.
|
|
Defaults to None.
|
|
outputs_to_make_channel_last: A list of strs specifying graph output names to be transposed into channel-last.
|
|
Defaults to None.
|
|
dynamic_input_shapes: A list of tuples specifying model input name to and its static shape in comma seprated
|
|
format, for example: [('input', '1,3,256,256')]. Defaults to None.
|
|
exclude_initializer_from_input: A bool specifying whether to exclude initializer from input. Defaults to False.
|
|
|
|
Returns:
|
|
A bool indicating whether the model is modified.
|
|
"""
|
|
return qnn.qnn_preprocess_model(
|
|
model_input,
|
|
model_output,
|
|
fuse_layernorm=fuse_layernorm,
|
|
save_as_external_data=save_as_external_data,
|
|
all_tensors_to_one_file=all_tensors_to_one_file,
|
|
external_data_location=external_data_location,
|
|
external_data_size_threshold=external_data_size_threshold,
|
|
external_data_convert_attribute=external_data_convert_attribute,
|
|
inputs_to_make_channel_last=inputs_to_make_channel_last,
|
|
outputs_to_make_channel_last=outputs_to_make_channel_last,
|
|
dynamic_input_shapes=dynamic_input_shapes,
|
|
exclude_initializer_from_input=exclude_initializer_from_input,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = _parse_arguments()
|
|
qnn_preprocess_model(
|
|
args.input_model_path,
|
|
args.output_model_path,
|
|
fuse_layernorm=args.fuse_layernorm,
|
|
save_as_external_data=args.save_as_external_data,
|
|
all_tensors_to_one_file=args.all_tensors_to_one_file,
|
|
external_data_location=args.external_data_location,
|
|
external_data_size_threshold=args.external_data_size_threshold,
|
|
external_data_convert_attribute=args.external_data_convert_attribute,
|
|
inputs_to_make_channel_last=args.inputs_to_make_channel_last,
|
|
outputs_to_make_channel_last=args.outputs_to_make_channel_last,
|
|
dynamic_input_shapes=args.dynamic_input_shapes,
|
|
exclude_initializer_from_input=args.exclude_initializer_from_input,
|
|
)
|