The LatentRuntimeEngine class provides a C++ interface to load and run ONNX models using ONNX Runtime. More...

#include <onnx_lre.hpp>

Public Member Functions
	LatentRuntimeEngine (const std::string &modelPath, const Options &config=Options())
	High-performance inference engine for ONNX models.
	~LatentRuntimeEngine ()
	Releases all allocated resources.
size_t	getNumberOfInputs () const
	Returns the number of input tensors required by the model.
size_t	getNumberOfOutputs () const
	Returns the number of output tensors produced by the model.
const std::vector< const char * > &	getInputNames () const
	Retrieves the names of all model input nodes.
const std::vector< const char * > &	getOutputNames () const
	Retrieves the names of all model output nodes.
std::vector< std::string >	getInputDTypes () const
	Gets the data types of all input tensors as strings.
std::vector< std::string >	getOutputDTypes () const
	Gets the data types of all output tensors as strings.
const std::vector< std::vector< int64_t > > &	getInputShapes () const
	Retrieves the dimensional shapes of all input tensors.
const std::vector< std::vector< int64_t > > &	getOutputShapes () const
	Retrieves the dimensional shapes of all output tensors.
std::string	getExecutionProvider () const
	Returns the currently active execution provider.
void	infer (const std::vector< DLManagedTensor * > &t_input_data_vec)
	Performs inference using DLPack tensor inputs.
void	infer (const std::vector< Ort::Value > &t_input_data_vec)
	Performs inference using ONNX Runtime tensor inputs.
void	infer (const std::vector< void * > &t_input_data_vec, const std::vector< int64_t * > shape, const std::string device)
	Performs inference using raw memory pointers and shapes.
Ort::Value	makeORTTensor (void t_input_data_vec, const int64_t shape, int input_index, const std::string &device)
	Creates an ONNX Runtime tensor from raw memory.
std::vector< DLManagedTensor * >	getOutput ()
	Retrieves inference results as DLPack tensors.
std::vector< Ort::Value >	getOutputOrt ()
	Retrieves and transfers ownership of inference results as ONNX Runtime tensors.
void	setCPUOutput (bool use_cpu)
	Controls output tensor placement between device and host memory.
bool	isCPUOutput ()
	Checks the current output tensor memory placement policy.
std::string	getMetaValue (std::string key)
	Retrieves model metadata by key.
std::string	getVersion ()
	Retrieves the ONNX LRE library version at runtime.

Private Member Functions
void	autoSelectExecutionProvider (ExecutionProvider currentProvider)
	Auto-selects the most appropriate execution provider based on system capabilities.
void	initLRE (std::vector< unsigned char > model)
	Initializes the model for inference.
void	configureTensorRTProvider ()
	Configures TensorRT provider options.
void	configureCUDAProvider ()
	Configures TensorRT provider options.
void	allocateIO (bool onlyOutput)
void	hasDynamicInputsOutputs (Ort::Session &session)
void	fetchInputNodeInfo ()
	Fetches and stores input node information.
void	fetchOutputNodeInfo ()
	Fetches and stores output node information.
double	getAverageInferenceTimeMs () const
std::string	generateModelInit ()
std::string	generateModelInferenceEvent ()

Private Attributes
Options	config
Ort::Env	env
	ONNX Runtime environment.
Ort::SessionOptions	sessionOptions
	Session options for ONNX Runtime.
Ort::Session	session {nullptr}
	The ONNX Runtime session for model inference.
Ort::IoBinding	io_binding {nullptr}
std::string	model_path
	Path to the ONNX model file.
bool	isModelLoaded = false
	Flag indicating if the model is successfully loaded.
bool	gpuInput = false
	Flag indicating if the input tensors shpuld be on GPU (true for CUDA and TensorrRT)
bool	gpuOutput = false
	Flag indicating if the output tensors would be on GPU (true for CUDA and TensorrRT)
bool	graphQuantized = false
	Flag indicating if the model is quantized.
bool	dynamicGraph = false
	Flag indicating if the model is dynamic (condition and loop)
bool	dynamicInputs = false
	Flag indicating if the model has dynamic input shape.
bool	dynamicOutputs = false
	Flag indicating if the model has dynamic output shape.
bool	enableProfiling_ = false
	Flag to enable profiling for the engine.
Ort::MemoryInfo	cpu_memory_info {nullptr}
Ort::MemoryInfo	cuda_memory_info {nullptr}
OrtTensorRTProviderOptionsV2 *	tensorrt_options = nullptr
	TensorRT provider options.
OrtCUDAProviderOptionsV2 *	cuda_options = nullptr
	CUDA Provider options.
Ort::ModelMetadata	metadata {nullptr}
size_t	number_inputs = 0
size_t	number_outputs = 0
	Count of input and output nodes.
std::vector< const char * >	input_names
std::vector< const char * >	output_names
	Names of input and output nodes.
std::vector< ONNXTensorElementDataType >	input_dtypes
std::vector< ONNXTensorElementDataType >	output_dtypes
	Data types of input and output nodes.
std::vector< std::vector< int64_t > >	input_shapes
std::vector< std::vector< int64_t > >	output_shapes
	Shapes of input and output nodes.
std::vector< size_t >	input_tensors_dtype_bytes
std::vector< size_t >	output_tensors_dtype_bytes
Ort::AllocatorWithDefaultOptions	allocator
	Allocator for ONNX Runtime.
OrtAllocator *	cpu_device_allocator = nullptr
OrtAllocator *	cuda_device_allocator = nullptr
std::vector< Ort::Value >	input_tensors
std::vector< Ort::Value >	output_tensors
ExecutionProvider	executionProvider
Precision	precision
std::string	sys_info_dump
std::string	model_context_uuid
Ort::Value	dummy_tensor {nullptr}
std::string	tempDirectoryPath
LeipCommClient	comm_
LicenseWithKey	license_
std::vector< double >	infer_durations = std::vector<double>(INFER_HISTORY_SIZE, 0.0)
size_t	infer_index = 0
size_t	infer_count = 0
size_t	total_infer_count = 0
std::chrono::_V2::system_clock::time_point	start_time
std::chrono::_V2::system_clock::time_point	end_time

Static Private Attributes
static constexpr size_t	INFER_HISTORY_SIZE = 10

Detailed Description

The LatentRuntimeEngine class provides a C++ interface to load and run ONNX models using ONNX Runtime.

This class abstracts the details of model loading, device configuration, and inference execution. It allows users to easily run ONNX models on various hardware with different configurations and precision settings.

Member Function Documentation

◆ autoSelectExecutionProvider()

void LatentRuntimeEngine::autoSelectExecutionProvider ( ExecutionProvider currentProvider )

private

Auto-selects the most appropriate execution provider based on system capabilities.

Parameters

currentProvider The currently set execution provider

Returns: Selected ExecutionProvider based on system capabilities

◆ initLRE()

void LatentRuntimeEngine::initLRE ( std::vector< unsigned char > model )

private

Initializes the model for inference.

◆ configureTensorRTProvider()

void LatentRuntimeEngine::configureTensorRTProvider ( )

private

Configures TensorRT provider options.

◆ configureCUDAProvider()

void LatentRuntimeEngine::configureCUDAProvider ( )

private

Configures TensorRT provider options.

◆ allocateIO()

void LatentRuntimeEngine::allocateIO ( bool onlyOutput )

private

◆ hasDynamicInputsOutputs()

void LatentRuntimeEngine::hasDynamicInputsOutputs ( Ort::Session & session )

private

◆ fetchInputNodeInfo()

void LatentRuntimeEngine::fetchInputNodeInfo ( )

private

Fetches and stores input node information.

◆ fetchOutputNodeInfo()

void LatentRuntimeEngine::fetchOutputNodeInfo ( )

private

Fetches and stores output node information.

◆ getAverageInferenceTimeMs()

double LatentRuntimeEngine::getAverageInferenceTimeMs ( ) const

private

◆ generateModelInit()

std::string LatentRuntimeEngine::generateModelInit ( )

private

◆ generateModelInferenceEvent()

std::string LatentRuntimeEngine::generateModelInferenceEvent ( )

private

Member Data Documentation

◆ config

Options OnnxLre::LatentRuntimeEngine::config

private

◆ env

Ort::Env OnnxLre::LatentRuntimeEngine::env

private

ONNX Runtime environment.

◆ sessionOptions

Ort::SessionOptions OnnxLre::LatentRuntimeEngine::sessionOptions

private

Session options for ONNX Runtime.

◆ session

Ort::Session OnnxLre::LatentRuntimeEngine::session {nullptr}

private

The ONNX Runtime session for model inference.

◆ io_binding

Ort::IoBinding OnnxLre::LatentRuntimeEngine::io_binding {nullptr}

private

◆ model_path

std::string OnnxLre::LatentRuntimeEngine::model_path

private

Path to the ONNX model file.

◆ isModelLoaded

bool OnnxLre::LatentRuntimeEngine::isModelLoaded = false

private

Flag indicating if the model is successfully loaded.

◆ gpuInput

bool OnnxLre::LatentRuntimeEngine::gpuInput = false

private

Flag indicating if the input tensors shpuld be on GPU (true for CUDA and TensorrRT)

◆ gpuOutput

bool OnnxLre::LatentRuntimeEngine::gpuOutput = false

private

Flag indicating if the output tensors would be on GPU (true for CUDA and TensorrRT)

◆ graphQuantized

bool OnnxLre::LatentRuntimeEngine::graphQuantized = false

private

Flag indicating if the model is quantized.

◆ dynamicGraph

bool OnnxLre::LatentRuntimeEngine::dynamicGraph = false

private

Flag indicating if the model is dynamic (condition and loop)

◆ dynamicInputs

bool OnnxLre::LatentRuntimeEngine::dynamicInputs = false

private

Flag indicating if the model has dynamic input shape.

◆ dynamicOutputs

bool OnnxLre::LatentRuntimeEngine::dynamicOutputs = false

private

Flag indicating if the model has dynamic output shape.

◆ enableProfiling_

bool OnnxLre::LatentRuntimeEngine::enableProfiling_ = false

private

Flag to enable profiling for the engine.

◆ cpu_memory_info

Ort::MemoryInfo OnnxLre::LatentRuntimeEngine::cpu_memory_info {nullptr}

private

◆ cuda_memory_info

Ort::MemoryInfo OnnxLre::LatentRuntimeEngine::cuda_memory_info {nullptr}

private

◆ tensorrt_options

OrtTensorRTProviderOptionsV2* OnnxLre::LatentRuntimeEngine::tensorrt_options = nullptr

private

TensorRT provider options.

◆ cuda_options

OrtCUDAProviderOptionsV2* OnnxLre::LatentRuntimeEngine::cuda_options = nullptr

private

CUDA Provider options.

◆ metadata

Ort::ModelMetadata OnnxLre::LatentRuntimeEngine::metadata {nullptr}

private

◆ number_inputs

size_t OnnxLre::LatentRuntimeEngine::number_inputs = 0

private

◆ number_outputs

size_t OnnxLre::LatentRuntimeEngine::number_outputs = 0

private

Count of input and output nodes.

◆ input_names

std::vector<const char *> OnnxLre::LatentRuntimeEngine::input_names

private

◆ output_names

std::vector<const char *> OnnxLre::LatentRuntimeEngine::output_names

private

Names of input and output nodes.

◆ input_dtypes

std::vector<ONNXTensorElementDataType> OnnxLre::LatentRuntimeEngine::input_dtypes

private

◆ output_dtypes

std::vector<ONNXTensorElementDataType> OnnxLre::LatentRuntimeEngine::output_dtypes

private

Data types of input and output nodes.

◆ input_shapes

std::vector<std::vector<int64_t> > OnnxLre::LatentRuntimeEngine::input_shapes

private

◆ output_shapes

std::vector<std::vector<int64_t> > OnnxLre::LatentRuntimeEngine::output_shapes

private

Shapes of input and output nodes.

◆ input_tensors_dtype_bytes

std::vector<size_t> OnnxLre::LatentRuntimeEngine::input_tensors_dtype_bytes

private

◆ output_tensors_dtype_bytes

std::vector<size_t> OnnxLre::LatentRuntimeEngine::output_tensors_dtype_bytes

private

◆ allocator

Ort::AllocatorWithDefaultOptions OnnxLre::LatentRuntimeEngine::allocator

private

Allocator for ONNX Runtime.

◆ cpu_device_allocator

OrtAllocator* OnnxLre::LatentRuntimeEngine::cpu_device_allocator = nullptr

private

◆ cuda_device_allocator

OrtAllocator* OnnxLre::LatentRuntimeEngine::cuda_device_allocator = nullptr

private

◆ input_tensors

std::vector<Ort::Value> OnnxLre::LatentRuntimeEngine::input_tensors

private

◆ output_tensors

std::vector<Ort::Value> OnnxLre::LatentRuntimeEngine::output_tensors

private

◆ executionProvider

ExecutionProvider OnnxLre::LatentRuntimeEngine::executionProvider

private

◆ precision

Precision OnnxLre::LatentRuntimeEngine::precision

private

◆ sys_info_dump

std::string OnnxLre::LatentRuntimeEngine::sys_info_dump

private

◆ model_context_uuid

std::string OnnxLre::LatentRuntimeEngine::model_context_uuid

private

◆ dummy_tensor

Ort::Value OnnxLre::LatentRuntimeEngine::dummy_tensor {nullptr}

private

◆ tempDirectoryPath

std::string OnnxLre::LatentRuntimeEngine::tempDirectoryPath

private

◆ comm_

LeipCommClient OnnxLre::LatentRuntimeEngine::comm_

private

◆ license_

LicenseWithKey OnnxLre::LatentRuntimeEngine::license_

private

◆ INFER_HISTORY_SIZE

size_t OnnxLre::LatentRuntimeEngine::INFER_HISTORY_SIZE = 10

staticconstexprprivate

◆ infer_durations

std::vector<double> OnnxLre::LatentRuntimeEngine::infer_durations = std::vector<double>(INFER_HISTORY_SIZE, 0.0)

private

◆ infer_index

size_t OnnxLre::LatentRuntimeEngine::infer_index = 0

private

◆ infer_count

size_t OnnxLre::LatentRuntimeEngine::infer_count = 0

private

◆ total_infer_count

size_t OnnxLre::LatentRuntimeEngine::total_infer_count = 0

private

◆ start_time

std::chrono::_V2::system_clock::time_point OnnxLre::LatentRuntimeEngine::start_time

private

◆ end_time

std::chrono::_V2::system_clock::time_point OnnxLre::LatentRuntimeEngine::end_time

private

The documentation for this class was generated from the following files:

include/onnx_lre/onnx_lre.hpp
src/onnx_lre.cpp

Public Member Functions

Private Member Functions

Private Attributes

Static Private Attributes

Detailed Description

Member Function Documentation

◆ autoSelectExecutionProvider()

◆ initLRE()

◆ configureTensorRTProvider()

◆ configureCUDAProvider()

◆ allocateIO()

◆ hasDynamicInputsOutputs()

◆ fetchInputNodeInfo()

◆ fetchOutputNodeInfo()

◆ getAverageInferenceTimeMs()

◆ generateModelInit()

◆ generateModelInferenceEvent()

Member Data Documentation

◆ config

◆ env

◆ sessionOptions

◆ session

◆ io_binding

◆ model_path

◆ isModelLoaded

◆ gpuInput

◆ gpuOutput

◆ graphQuantized

◆ dynamicGraph

◆ dynamicInputs

◆ dynamicOutputs

◆ enableProfiling_

◆ cpu_memory_info

◆ cuda_memory_info

◆ tensorrt_options

◆ cuda_options

◆ metadata

◆ number_inputs

◆ number_outputs

◆ input_names

◆ output_names

◆ input_dtypes

◆ output_dtypes

◆ input_shapes

◆ output_shapes

◆ input_tensors_dtype_bytes

◆ output_tensors_dtype_bytes

◆ allocator

◆ cpu_device_allocator

◆ cuda_device_allocator

◆ input_tensors

◆ output_tensors

◆ executionProvider

◆ precision

◆ sys_info_dump

◆ model_context_uuid

◆ dummy_tensor

◆ tempDirectoryPath

◆ comm_

◆ license_

◆ INFER_HISTORY_SIZE

◆ infer_durations

◆ infer_index

◆ infer_count

◆ total_infer_count

◆ start_time

◆ end_time