ONNX-LRE
C++ API documentation
OnnxLre::LatentRuntimeEngine Class Reference

#include <onnx_lre.hpp>

Public Member Functions

 LatentRuntimeEngine (const std::string &modelPath, const Options &config=Options())
 High-performance inference engine for ONNX models. More...
 
 ~LatentRuntimeEngine ()
 Releases all allocated resources. More...
 
size_t getNumberOfInputs () const
 Returns the number of input tensors required by the model. More...
 
size_t getNumberOfOutputs () const
 Returns the number of output tensors produced by the model. More...
 
const std::vector< const char * > & getInputNames () const
 Retrieves the names of all model input nodes. More...
 
const std::vector< const char * > & getOutputNames () const
 Retrieves the names of all model output nodes. More...
 
std::vector< std::string > getInputDTypes () const
 Gets the data types of all input tensors as strings. More...
 
std::vector< std::string > getOutputDTypes () const
 Gets the data types of all output tensors as strings. More...
 
const std::vector< std::vector< int64_t > > & getInputShapes () const
 Retrieves the dimensional shapes of all input tensors. More...
 
const std::vector< std::vector< int64_t > > & getOutputShapes () const
 Retrieves the dimensional shapes of all output tensors. More...
 
void infer (const std::vector< DLManagedTensor * > &t_input_data_vec)
 Performs inference using DLPack tensor inputs. More...
 
void infer (const std::vector< Ort::Value > &t_input_data_vec)
 Performs inference using ONNX Runtime tensor inputs. More...
 
void infer (const std::vector< void * > &t_input_data_vec, const std::vector< int64_t * > shape, const std::string device)
 Performs inference using raw memory pointers and shapes. More...
 
Ort::Value makeORTTensor (void *t_input_data_vec, const int64_t *shape, int input_index, const std::string &device)
 Creates an ONNX Runtime tensor from raw memory. More...
 
std::vector< DLManagedTensor * > getOutput ()
 Retrieves inference results as DLPack tensors. More...
 
std::vector< Ort::Value > getOutputOrt ()
 Retrieves and transfers ownership of inference results as ONNX Runtime tensors. More...
 
void setCPUOutput (bool use_cpu)
 Controls output tensor placement between device and host memory. More...
 
bool isCPUOutput ()
 Checks the current output tensor memory placement policy. More...
 
std::string getMetaValue (std::string key)
 Retrieves model metadata by key. More...
 

Private Member Functions

void initLRE (std::vector< unsigned char > model)
 Initializes the model for inference. More...
 
void configureTensorRTProvider ()
 Configures TensorRT provider options. More...
 
void configureCUDAProvider ()
 Configures TensorRT provider options. More...
 
void fetchInputNodeInfo ()
 Fetches and stores input node information. More...
 
void fetchOutputNodeInfo ()
 Fetches and stores output node information. More...
 

Private Attributes

Options config
 
Ort::Env env
 ONNX Runtime environment. More...
 
Ort::SessionOptions sessionOptions
 Session options for ONNX Runtime. More...
 
Ort::Session session {nullptr}
 The ONNX Runtime session for model inference. More...
 
Ort::IoBinding io_binding {nullptr}
 
std::string model_path
 Path to the ONNX model file. More...
 
bool isModelLoaded = false
 Flag indicating if the model is successfully loaded. More...
 
bool gpuOutput = false
 Flag indicating if the output tensors would be on GPU (true for CUDA and TensorrRT) More...
 
bool graphQuantized = false
 
bool trt_calib = false
 Flag for trt_calib available. More...
 
Ort::MemoryInfo cpu_memory_info {nullptr}
 
Ort::MemoryInfo cuda_memory_info {nullptr}
 
OrtTensorRTProviderOptionsV2 * tensorrt_options = nullptr
 TensorRT provider options. More...
 
OrtCUDAProviderOptionsV2 * cuda_options = nullptr
 CUDA Provider options. More...
 
Ort::ModelMetadata metadata {nullptr}
 
size_t number_inputs = 0
 
size_t number_outputs = 0
 Count of input and output nodes. More...
 
std::vector< const char * > input_names
 
std::vector< const char * > output_names
 Names of input and output nodes. More...
 
std::vector< ONNXTensorElementDataType > input_dtypes
 
std::vector< ONNXTensorElementDataType > output_dtypes
 Data types of input and output nodes. More...
 
std::vector< std::vector< int64_t > > input_shapes
 
std::vector< std::vector< int64_t > > output_shapes
 Shapes of input and output nodes. More...
 
std::vector< size_t > input_tensors_dtype_bytes
 
std::vector< size_t > output_tensors_dtype_bytes
 
Ort::AllocatorWithDefaultOptions allocator
 Allocator for ONNX Runtime. More...
 
std::vector< Ort::Value > input_tensors
 
std::vector< Ort::Value > output_tensors
 
ExecutionProvider executionProvider
 
Ort::Value dummy_tensor {nullptr}
 
std::string tempDirectoryPath
 
std::string calibration_file_path
 

Member Function Documentation

◆ initLRE()

void LatentRuntimeEngine::initLRE ( std::vector< unsigned char >  model)
private

Initializes the model for inference.

◆ configureTensorRTProvider()

void LatentRuntimeEngine::configureTensorRTProvider ( )
private

Configures TensorRT provider options.

◆ configureCUDAProvider()

void LatentRuntimeEngine::configureCUDAProvider ( )
private

Configures TensorRT provider options.

◆ fetchInputNodeInfo()

void LatentRuntimeEngine::fetchInputNodeInfo ( )
private

Fetches and stores input node information.

◆ fetchOutputNodeInfo()

void LatentRuntimeEngine::fetchOutputNodeInfo ( )
private

Fetches and stores output node information.

Member Data Documentation

◆ config

Options OnnxLre::LatentRuntimeEngine::config
private

◆ env

Ort::Env OnnxLre::LatentRuntimeEngine::env
private

ONNX Runtime environment.

◆ sessionOptions

Ort::SessionOptions OnnxLre::LatentRuntimeEngine::sessionOptions
private

Session options for ONNX Runtime.

◆ session

Ort::Session OnnxLre::LatentRuntimeEngine::session {nullptr}
private

The ONNX Runtime session for model inference.

◆ io_binding

Ort::IoBinding OnnxLre::LatentRuntimeEngine::io_binding {nullptr}
private

◆ model_path

std::string OnnxLre::LatentRuntimeEngine::model_path
private

Path to the ONNX model file.

◆ isModelLoaded

bool OnnxLre::LatentRuntimeEngine::isModelLoaded = false
private

Flag indicating if the model is successfully loaded.

◆ gpuOutput

bool OnnxLre::LatentRuntimeEngine::gpuOutput = false
private

Flag indicating if the output tensors would be on GPU (true for CUDA and TensorrRT)

◆ graphQuantized

bool OnnxLre::LatentRuntimeEngine::graphQuantized = false
private

◆ trt_calib

bool OnnxLre::LatentRuntimeEngine::trt_calib = false
private

Flag for trt_calib available.

◆ cpu_memory_info

Ort::MemoryInfo OnnxLre::LatentRuntimeEngine::cpu_memory_info {nullptr}
private

◆ cuda_memory_info

Ort::MemoryInfo OnnxLre::LatentRuntimeEngine::cuda_memory_info {nullptr}
private

◆ tensorrt_options

OrtTensorRTProviderOptionsV2* OnnxLre::LatentRuntimeEngine::tensorrt_options = nullptr
private

TensorRT provider options.

◆ cuda_options

OrtCUDAProviderOptionsV2* OnnxLre::LatentRuntimeEngine::cuda_options = nullptr
private

CUDA Provider options.

◆ metadata

Ort::ModelMetadata OnnxLre::LatentRuntimeEngine::metadata {nullptr}
private

◆ number_inputs

size_t OnnxLre::LatentRuntimeEngine::number_inputs = 0
private

◆ number_outputs

size_t OnnxLre::LatentRuntimeEngine::number_outputs = 0
private

Count of input and output nodes.

◆ input_names

std::vector<const char *> OnnxLre::LatentRuntimeEngine::input_names
private

◆ output_names

std::vector<const char *> OnnxLre::LatentRuntimeEngine::output_names
private

Names of input and output nodes.

◆ input_dtypes

std::vector<ONNXTensorElementDataType> OnnxLre::LatentRuntimeEngine::input_dtypes
private

◆ output_dtypes

std::vector<ONNXTensorElementDataType> OnnxLre::LatentRuntimeEngine::output_dtypes
private

Data types of input and output nodes.

◆ input_shapes

std::vector<std::vector<int64_t> > OnnxLre::LatentRuntimeEngine::input_shapes
private

◆ output_shapes

std::vector<std::vector<int64_t> > OnnxLre::LatentRuntimeEngine::output_shapes
private

Shapes of input and output nodes.

◆ input_tensors_dtype_bytes

std::vector<size_t> OnnxLre::LatentRuntimeEngine::input_tensors_dtype_bytes
private

◆ output_tensors_dtype_bytes

std::vector<size_t> OnnxLre::LatentRuntimeEngine::output_tensors_dtype_bytes
private

◆ allocator

Ort::AllocatorWithDefaultOptions OnnxLre::LatentRuntimeEngine::allocator
private

Allocator for ONNX Runtime.

◆ input_tensors

std::vector<Ort::Value> OnnxLre::LatentRuntimeEngine::input_tensors
private

◆ output_tensors

std::vector<Ort::Value> OnnxLre::LatentRuntimeEngine::output_tensors
private

◆ executionProvider

ExecutionProvider OnnxLre::LatentRuntimeEngine::executionProvider
private

◆ dummy_tensor

Ort::Value OnnxLre::LatentRuntimeEngine::dummy_tensor {nullptr}
private

◆ tempDirectoryPath

std::string OnnxLre::LatentRuntimeEngine::tempDirectoryPath
private

◆ calibration_file_path

std::string OnnxLre::LatentRuntimeEngine::calibration_file_path
private

The documentation for this class was generated from the following files: