ONNX-LRE
C++ API documentation
Loading...
Searching...
No Matches
OnnxLre::LatentRuntimeEngine Class Reference

The LatentRuntimeEngine class provides a C++ interface to load and run ONNX models using ONNX Runtime. More...

#include <onnx_lre.hpp>

Public Member Functions

 LatentRuntimeEngine (const std::string &modelPath, const Options &config=Options())
 High-performance inference engine for ONNX models.
 ~LatentRuntimeEngine ()
 Releases all allocated resources.
size_t getNumberOfInputs () const
 Returns the number of input tensors required by the model.
size_t getNumberOfOutputs () const
 Returns the number of output tensors produced by the model.
const std::vector< const char * > & getInputNames () const
 Retrieves the names of all model input nodes.
const std::vector< const char * > & getOutputNames () const
 Retrieves the names of all model output nodes.
std::vector< std::string > getInputDTypes () const
 Gets the data types of all input tensors as strings.
std::vector< std::string > getOutputDTypes () const
 Gets the data types of all output tensors as strings.
const std::vector< std::vector< int64_t > > & getInputShapes () const
 Retrieves the dimensional shapes of all input tensors.
const std::vector< std::vector< int64_t > > & getOutputShapes () const
 Retrieves the dimensional shapes of all output tensors.
std::string getExecutionProvider () const
 Returns the currently active execution provider.
void infer (const std::vector< DLManagedTensor * > &t_input_data_vec)
 Performs inference using DLPack tensor inputs.
void infer (const std::vector< Ort::Value > &t_input_data_vec)
 Performs inference using ONNX Runtime tensor inputs.
void infer (const std::vector< void * > &t_input_data_vec, const std::vector< int64_t * > shape, const std::string device)
 Performs inference using raw memory pointers and shapes.
Ort::Value makeORTTensor (void *t_input_data_vec, const int64_t *shape, int input_index, const std::string &device)
 Creates an ONNX Runtime tensor from raw memory.
std::vector< DLManagedTensor * > getOutput ()
 Retrieves inference results as DLPack tensors.
std::vector< Ort::Value > getOutputOrt ()
 Retrieves and transfers ownership of inference results as ONNX Runtime tensors.
void setCPUOutput (bool use_cpu)
 Controls output tensor placement between device and host memory.
bool isCPUOutput ()
 Checks the current output tensor memory placement policy.
std::string getMetaValue (std::string key)
 Retrieves model metadata by key.
std::string getVersion ()
 Retrieves the ONNX LRE library version at runtime.

Private Member Functions

void autoSelectExecutionProvider (ExecutionProvider currentProvider)
 Auto-selects the most appropriate execution provider based on system capabilities.
void initLRE (std::vector< unsigned char > model)
 Initializes the model for inference.
void configureTensorRTProvider ()
 Configures TensorRT provider options.
void configureCUDAProvider ()
 Configures TensorRT provider options.
void allocateIO (bool onlyOutput)
void hasDynamicInputsOutputs (Ort::Session &session)
void fetchInputNodeInfo ()
 Fetches and stores input node information.
void fetchOutputNodeInfo ()
 Fetches and stores output node information.
double getAverageInferenceTimeMs () const
std::string generateModelInit ()
std::string generateModelInferenceEvent ()

Private Attributes

Options config
Ort::Env env
 ONNX Runtime environment.
Ort::SessionOptions sessionOptions
 Session options for ONNX Runtime.
Ort::Session session {nullptr}
 The ONNX Runtime session for model inference.
Ort::IoBinding io_binding {nullptr}
std::string model_path
 Path to the ONNX model file.
bool isModelLoaded = false
 Flag indicating if the model is successfully loaded.
bool gpuInput = false
 Flag indicating if the input tensors shpuld be on GPU (true for CUDA and TensorrRT)
bool gpuOutput = false
 Flag indicating if the output tensors would be on GPU (true for CUDA and TensorrRT)
bool graphQuantized = false
 Flag indicating if the model is quantized.
bool dynamicGraph = false
 Flag indicating if the model is dynamic (condition and loop)
bool dynamicInputs = false
 Flag indicating if the model has dynamic input shape.
bool dynamicOutputs = false
 Flag indicating if the model has dynamic output shape.
bool enableProfiling_ = false
 Flag to enable profiling for the engine.
Ort::MemoryInfo cpu_memory_info {nullptr}
Ort::MemoryInfo cuda_memory_info {nullptr}
OrtTensorRTProviderOptionsV2 * tensorrt_options = nullptr
 TensorRT provider options.
OrtCUDAProviderOptionsV2 * cuda_options = nullptr
 CUDA Provider options.
Ort::ModelMetadata metadata {nullptr}
size_t number_inputs = 0
size_t number_outputs = 0
 Count of input and output nodes.
std::vector< const char * > input_names
std::vector< const char * > output_names
 Names of input and output nodes.
std::vector< ONNXTensorElementDataType > input_dtypes
std::vector< ONNXTensorElementDataType > output_dtypes
 Data types of input and output nodes.
std::vector< std::vector< int64_t > > input_shapes
std::vector< std::vector< int64_t > > output_shapes
 Shapes of input and output nodes.
std::vector< size_t > input_tensors_dtype_bytes
std::vector< size_t > output_tensors_dtype_bytes
Ort::AllocatorWithDefaultOptions allocator
 Allocator for ONNX Runtime.
OrtAllocator * cpu_device_allocator = nullptr
OrtAllocator * cuda_device_allocator = nullptr
std::vector< Ort::Value > input_tensors
std::vector< Ort::Value > output_tensors
ExecutionProvider executionProvider
Precision precision
std::string sys_info_dump
std::string model_context_uuid
Ort::Value dummy_tensor {nullptr}
std::string tempDirectoryPath
LeipCommClient comm_
LicenseWithKey license_
std::vector< double > infer_durations = std::vector<double>(INFER_HISTORY_SIZE, 0.0)
size_t infer_index = 0
size_t infer_count = 0
size_t total_infer_count = 0
std::chrono::_V2::system_clock::time_point start_time
std::chrono::_V2::system_clock::time_point end_time

Static Private Attributes

static constexpr size_t INFER_HISTORY_SIZE = 10

Detailed Description

The LatentRuntimeEngine class provides a C++ interface to load and run ONNX models using ONNX Runtime.

This class abstracts the details of model loading, device configuration, and inference execution. It allows users to easily run ONNX models on various hardware with different configurations and precision settings.

Member Function Documentation

◆ autoSelectExecutionProvider()

void LatentRuntimeEngine::autoSelectExecutionProvider ( ExecutionProvider currentProvider)
private

Auto-selects the most appropriate execution provider based on system capabilities.

Parameters
currentProviderThe currently set execution provider
Returns
Selected ExecutionProvider based on system capabilities

◆ initLRE()

void LatentRuntimeEngine::initLRE ( std::vector< unsigned char > model)
private

Initializes the model for inference.

◆ configureTensorRTProvider()

void LatentRuntimeEngine::configureTensorRTProvider ( )
private

Configures TensorRT provider options.

◆ configureCUDAProvider()

void LatentRuntimeEngine::configureCUDAProvider ( )
private

Configures TensorRT provider options.

◆ allocateIO()

void LatentRuntimeEngine::allocateIO ( bool onlyOutput)
private

◆ hasDynamicInputsOutputs()

void LatentRuntimeEngine::hasDynamicInputsOutputs ( Ort::Session & session)
private

◆ fetchInputNodeInfo()

void LatentRuntimeEngine::fetchInputNodeInfo ( )
private

Fetches and stores input node information.

◆ fetchOutputNodeInfo()

void LatentRuntimeEngine::fetchOutputNodeInfo ( )
private

Fetches and stores output node information.

◆ getAverageInferenceTimeMs()

double LatentRuntimeEngine::getAverageInferenceTimeMs ( ) const
private

◆ generateModelInit()

std::string LatentRuntimeEngine::generateModelInit ( )
private

◆ generateModelInferenceEvent()

std::string LatentRuntimeEngine::generateModelInferenceEvent ( )
private

Member Data Documentation

◆ config

Options OnnxLre::LatentRuntimeEngine::config
private

◆ env

Ort::Env OnnxLre::LatentRuntimeEngine::env
private

ONNX Runtime environment.

◆ sessionOptions

Ort::SessionOptions OnnxLre::LatentRuntimeEngine::sessionOptions
private

Session options for ONNX Runtime.

◆ session

Ort::Session OnnxLre::LatentRuntimeEngine::session {nullptr}
private

The ONNX Runtime session for model inference.

◆ io_binding

Ort::IoBinding OnnxLre::LatentRuntimeEngine::io_binding {nullptr}
private

◆ model_path

std::string OnnxLre::LatentRuntimeEngine::model_path
private

Path to the ONNX model file.

◆ isModelLoaded

bool OnnxLre::LatentRuntimeEngine::isModelLoaded = false
private

Flag indicating if the model is successfully loaded.

◆ gpuInput

bool OnnxLre::LatentRuntimeEngine::gpuInput = false
private

Flag indicating if the input tensors shpuld be on GPU (true for CUDA and TensorrRT)

◆ gpuOutput

bool OnnxLre::LatentRuntimeEngine::gpuOutput = false
private

Flag indicating if the output tensors would be on GPU (true for CUDA and TensorrRT)

◆ graphQuantized

bool OnnxLre::LatentRuntimeEngine::graphQuantized = false
private

Flag indicating if the model is quantized.

◆ dynamicGraph

bool OnnxLre::LatentRuntimeEngine::dynamicGraph = false
private

Flag indicating if the model is dynamic (condition and loop)

◆ dynamicInputs

bool OnnxLre::LatentRuntimeEngine::dynamicInputs = false
private

Flag indicating if the model has dynamic input shape.

◆ dynamicOutputs

bool OnnxLre::LatentRuntimeEngine::dynamicOutputs = false
private

Flag indicating if the model has dynamic output shape.

◆ enableProfiling_

bool OnnxLre::LatentRuntimeEngine::enableProfiling_ = false
private

Flag to enable profiling for the engine.

◆ cpu_memory_info

Ort::MemoryInfo OnnxLre::LatentRuntimeEngine::cpu_memory_info {nullptr}
private

◆ cuda_memory_info

Ort::MemoryInfo OnnxLre::LatentRuntimeEngine::cuda_memory_info {nullptr}
private

◆ tensorrt_options

OrtTensorRTProviderOptionsV2* OnnxLre::LatentRuntimeEngine::tensorrt_options = nullptr
private

TensorRT provider options.

◆ cuda_options

OrtCUDAProviderOptionsV2* OnnxLre::LatentRuntimeEngine::cuda_options = nullptr
private

CUDA Provider options.

◆ metadata

Ort::ModelMetadata OnnxLre::LatentRuntimeEngine::metadata {nullptr}
private

◆ number_inputs

size_t OnnxLre::LatentRuntimeEngine::number_inputs = 0
private

◆ number_outputs

size_t OnnxLre::LatentRuntimeEngine::number_outputs = 0
private

Count of input and output nodes.

◆ input_names

std::vector<const char *> OnnxLre::LatentRuntimeEngine::input_names
private

◆ output_names

std::vector<const char *> OnnxLre::LatentRuntimeEngine::output_names
private

Names of input and output nodes.

◆ input_dtypes

std::vector<ONNXTensorElementDataType> OnnxLre::LatentRuntimeEngine::input_dtypes
private

◆ output_dtypes

std::vector<ONNXTensorElementDataType> OnnxLre::LatentRuntimeEngine::output_dtypes
private

Data types of input and output nodes.

◆ input_shapes

std::vector<std::vector<int64_t> > OnnxLre::LatentRuntimeEngine::input_shapes
private

◆ output_shapes

std::vector<std::vector<int64_t> > OnnxLre::LatentRuntimeEngine::output_shapes
private

Shapes of input and output nodes.

◆ input_tensors_dtype_bytes

std::vector<size_t> OnnxLre::LatentRuntimeEngine::input_tensors_dtype_bytes
private

◆ output_tensors_dtype_bytes

std::vector<size_t> OnnxLre::LatentRuntimeEngine::output_tensors_dtype_bytes
private

◆ allocator

Ort::AllocatorWithDefaultOptions OnnxLre::LatentRuntimeEngine::allocator
private

Allocator for ONNX Runtime.

◆ cpu_device_allocator

OrtAllocator* OnnxLre::LatentRuntimeEngine::cpu_device_allocator = nullptr
private

◆ cuda_device_allocator

OrtAllocator* OnnxLre::LatentRuntimeEngine::cuda_device_allocator = nullptr
private

◆ input_tensors

std::vector<Ort::Value> OnnxLre::LatentRuntimeEngine::input_tensors
private

◆ output_tensors

std::vector<Ort::Value> OnnxLre::LatentRuntimeEngine::output_tensors
private

◆ executionProvider

ExecutionProvider OnnxLre::LatentRuntimeEngine::executionProvider
private

◆ precision

Precision OnnxLre::LatentRuntimeEngine::precision
private

◆ sys_info_dump

std::string OnnxLre::LatentRuntimeEngine::sys_info_dump
private

◆ model_context_uuid

std::string OnnxLre::LatentRuntimeEngine::model_context_uuid
private

◆ dummy_tensor

Ort::Value OnnxLre::LatentRuntimeEngine::dummy_tensor {nullptr}
private

◆ tempDirectoryPath

std::string OnnxLre::LatentRuntimeEngine::tempDirectoryPath
private

◆ comm_

LeipCommClient OnnxLre::LatentRuntimeEngine::comm_
private

◆ license_

LicenseWithKey OnnxLre::LatentRuntimeEngine::license_
private

◆ INFER_HISTORY_SIZE

size_t OnnxLre::LatentRuntimeEngine::INFER_HISTORY_SIZE = 10
staticconstexprprivate

◆ infer_durations

std::vector<double> OnnxLre::LatentRuntimeEngine::infer_durations = std::vector<double>(INFER_HISTORY_SIZE, 0.0)
private

◆ infer_index

size_t OnnxLre::LatentRuntimeEngine::infer_index = 0
private

◆ infer_count

size_t OnnxLre::LatentRuntimeEngine::infer_count = 0
private

◆ total_infer_count

size_t OnnxLre::LatentRuntimeEngine::total_infer_count = 0
private

◆ start_time

std::chrono::_V2::system_clock::time_point OnnxLre::LatentRuntimeEngine::start_time
private

◆ end_time

std::chrono::_V2::system_clock::time_point OnnxLre::LatentRuntimeEngine::end_time
private

The documentation for this class was generated from the following files:
  • include/onnx_lre/onnx_lre.hpp
  • src/onnx_lre.cpp