group-wbl/.venv/lib/python3.13/site-packages/onnxruntime/transformers/machine_info.py
2026-01-09 09:48:03 +08:00

229 lines
7.1 KiB
Python

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
# It is used to dump machine information for Notebooks
import argparse
import json
import logging
import platform
from os import environ
import cpuinfo
import psutil
from py3nvml.py3nvml import (
NVMLError,
nvmlDeviceGetCount,
nvmlDeviceGetHandleByIndex,
nvmlDeviceGetMemoryInfo,
nvmlDeviceGetName,
nvmlInit,
nvmlShutdown,
nvmlSystemGetDriverVersion,
)
class MachineInfo:
"""Class encapsulating Machine Info logic."""
def __init__(self, silent=False, logger=None):
self.silent = silent
if logger is None:
logging.basicConfig(
format="%(asctime)s - %(name)s - %(levelname)s: %(message)s",
level=logging.INFO,
)
self.logger = logging.getLogger(__name__)
else:
self.logger = logger
self.machine_info = None
try:
self.machine_info = self.get_machine_info()
except Exception:
self.logger.exception("Exception in getting machine info.")
self.machine_info = None
def get_machine_info(self):
"""Get machine info in metric format"""
gpu_info = self.get_gpu_info_by_nvml()
cpu_info = cpuinfo.get_cpu_info()
machine_info = {
"gpu": gpu_info,
"cpu": self.get_cpu_info(),
"memory": self.get_memory_info(),
"os": platform.platform(),
"python": self._try_get(cpu_info, ["python_version"]),
"packages": self.get_related_packages(),
"onnxruntime": self.get_onnxruntime_info(),
"pytorch": self.get_pytorch_info(),
"tensorflow": self.get_tensorflow_info(),
}
return machine_info
def get_memory_info(self) -> dict:
"""Get memory info"""
mem = psutil.virtual_memory()
return {"total": mem.total, "available": mem.available}
def _try_get(self, cpu_info: dict, names: list) -> str:
for name in names:
if name in cpu_info:
value = cpu_info[name]
if isinstance(value, (list, tuple)):
return ",".join([str(i) for i in value])
return value
return ""
def get_cpu_info(self) -> dict:
"""Get CPU info"""
cpu_info = cpuinfo.get_cpu_info()
return {
"brand": self._try_get(cpu_info, ["brand", "brand_raw"]),
"cores": psutil.cpu_count(logical=False),
"logical_cores": psutil.cpu_count(logical=True),
"hz": self._try_get(cpu_info, ["hz_actual"]),
"l2_cache": self._try_get(cpu_info, ["l2_cache_size"]),
"flags": self._try_get(cpu_info, ["flags"]),
"processor": platform.uname().processor,
}
def get_gpu_info_by_nvml(self) -> dict:
"""Get GPU info using nvml"""
gpu_info_list = []
driver_version = None
try:
nvmlInit()
driver_version = nvmlSystemGetDriverVersion()
deviceCount = nvmlDeviceGetCount() # noqa: N806
for i in range(deviceCount):
handle = nvmlDeviceGetHandleByIndex(i)
info = nvmlDeviceGetMemoryInfo(handle)
gpu_info = {}
gpu_info["memory_total"] = info.total
gpu_info["memory_available"] = info.free
gpu_info["name"] = nvmlDeviceGetName(handle)
gpu_info_list.append(gpu_info)
nvmlShutdown()
except NVMLError as error:
if not self.silent:
self.logger.error("Error fetching GPU information using nvml: %s", error)
return None
result = {"driver_version": driver_version, "devices": gpu_info_list}
if "CUDA_VISIBLE_DEVICES" in environ:
result["cuda_visible"] = environ["CUDA_VISIBLE_DEVICES"]
return result
def get_related_packages(self) -> list[str]:
import pkg_resources # noqa: PLC0415
installed_packages = pkg_resources.working_set
related_packages = [
"onnxruntime-gpu",
"onnxruntime",
"onnx",
"transformers",
"protobuf",
"sympy",
"torch",
"tensorflow",
"flatbuffers",
"numpy",
"onnxconverter-common",
]
related_packages_list = {i.key: i.version for i in installed_packages if i.key in related_packages}
return related_packages_list
def get_onnxruntime_info(self) -> dict:
try:
import onnxruntime # noqa: PLC0415
return {
"version": onnxruntime.__version__,
"support_gpu": "CUDAExecutionProvider" in onnxruntime.get_available_providers(),
}
except ImportError as error:
if not self.silent:
self.logger.exception(error)
return None
except Exception as exception:
if not self.silent:
self.logger.exception(exception, False)
return None
def get_pytorch_info(self) -> dict:
try:
import torch # noqa: PLC0415
return {
"version": torch.__version__,
"support_gpu": torch.cuda.is_available(),
"cuda": torch.version.cuda,
}
except ImportError as error:
if not self.silent:
self.logger.exception(error)
return None
except Exception as exception:
if not self.silent:
self.logger.exception(exception, False)
return None
def get_tensorflow_info(self) -> dict:
try:
import tensorflow as tf # noqa: PLC0415
return {
"version": tf.version.VERSION,
"git_version": tf.version.GIT_VERSION,
"support_gpu": tf.test.is_built_with_cuda(),
}
except ImportError as error:
if not self.silent:
self.logger.exception(error)
return None
except ModuleNotFoundError as error:
if not self.silent:
self.logger.exception(error)
return None
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument(
"--silent",
required=False,
action="store_true",
help="Do not print error message",
)
parser.set_defaults(silent=False)
args = parser.parse_args()
return args
def get_machine_info(silent=True) -> str:
machine = MachineInfo(silent)
return json.dumps(machine.machine_info, indent=2)
def get_device_info(silent=True) -> str:
machine = MachineInfo(silent)
info = machine.machine_info
if info:
info = {key: value for key, value in info.items() if key in ["gpu", "cpu", "memory"]}
return json.dumps(info, indent=2)
if __name__ == "__main__":
args = parse_arguments()
print(get_machine_info(args.silent))