#include <onnx_lre.hpp>

Public Member Functions
	LatentRuntimeEngine (const std::string &modelPath, const Options &config=Options())
	High-performance inference engine for ONNX models. More...

	~LatentRuntimeEngine ()
	Releases all allocated resources. More...

size_t	getNumberOfInputs () const
	Returns the number of input tensors required by the model. More...

size_t	getNumberOfOutputs () const
	Returns the number of output tensors produced by the model. More...

const std::vector< const char * > &	getInputNames () const
	Retrieves the names of all model input nodes. More...

const std::vector< const char * > &	getOutputNames () const
	Retrieves the names of all model output nodes. More...

std::vector< std::string >	getInputDTypes () const
	Gets the data types of all input tensors as strings. More...

std::vector< std::string >	getOutputDTypes () const
	Gets the data types of all output tensors as strings. More...

const std::vector< std::vector< int64_t > > &	getInputShapes () const
	Retrieves the dimensional shapes of all input tensors. More...

const std::vector< std::vector< int64_t > > &	getOutputShapes () const
	Retrieves the dimensional shapes of all output tensors. More...

void	infer (const std::vector< DLManagedTensor * > &t_input_data_vec)
	Performs inference using DLPack tensor inputs. More...

void	infer (const std::vector< Ort::Value > &t_input_data_vec)
	Performs inference using ONNX Runtime tensor inputs. More...

void	infer (const std::vector< void * > &t_input_data_vec, const std::vector< int64_t * > shape, const std::string device)
	Performs inference using raw memory pointers and shapes. More...

Ort::Value	makeORTTensor (void t_input_data_vec, const int64_t shape, int input_index, const std::string &device)
	Creates an ONNX Runtime tensor from raw memory. More...

std::vector< DLManagedTensor * >	getOutput ()
	Retrieves inference results as DLPack tensors. More...

std::vector< Ort::Value >	getOutputOrt ()
	Retrieves and transfers ownership of inference results as ONNX Runtime tensors. More...

void	setCPUOutput (bool use_cpu)
	Controls output tensor placement between device and host memory. More...

bool	isCPUOutput ()
	Checks the current output tensor memory placement policy. More...

std::string	getMetaValue (std::string key)
	Retrieves model metadata by key. More...

Private Member Functions
void	initLRE (std::vector< unsigned char > model)
	Initializes the model for inference. More...

void	configureTensorRTProvider ()
	Configures TensorRT provider options. More...

void	configureCUDAProvider ()
	Configures TensorRT provider options. More...

void	fetchInputNodeInfo ()
	Fetches and stores input node information. More...

void	fetchOutputNodeInfo ()
	Fetches and stores output node information. More...

Private Attributes
Options	config

Ort::Env	env
	ONNX Runtime environment. More...

Ort::SessionOptions	sessionOptions
	Session options for ONNX Runtime. More...

Ort::Session	session {nullptr}
	The ONNX Runtime session for model inference. More...

Ort::IoBinding	io_binding {nullptr}

std::string	model_path
	Path to the ONNX model file. More...

bool	isModelLoaded = false
	Flag indicating if the model is successfully loaded. More...

bool	gpuOutput = false
	Flag indicating if the output tensors would be on GPU (true for CUDA and TensorrRT) More...

bool	graphQuantized = false

bool	trt_calib = false
	Flag for trt_calib available. More...

Ort::MemoryInfo	cpu_memory_info {nullptr}

Ort::MemoryInfo	cuda_memory_info {nullptr}

OrtTensorRTProviderOptionsV2 *	tensorrt_options = nullptr
	TensorRT provider options. More...

OrtCUDAProviderOptionsV2 *	cuda_options = nullptr
	CUDA Provider options. More...

Ort::ModelMetadata	metadata {nullptr}

size_t	number_inputs = 0

size_t	number_outputs = 0
	Count of input and output nodes. More...

std::vector< const char * >	input_names

std::vector< const char * >	output_names
	Names of input and output nodes. More...

std::vector< ONNXTensorElementDataType >	input_dtypes

std::vector< ONNXTensorElementDataType >	output_dtypes
	Data types of input and output nodes. More...

std::vector< std::vector< int64_t > >	input_shapes

std::vector< std::vector< int64_t > >	output_shapes
	Shapes of input and output nodes. More...

std::vector< size_t >	input_tensors_dtype_bytes

std::vector< size_t >	output_tensors_dtype_bytes

Ort::AllocatorWithDefaultOptions	allocator
	Allocator for ONNX Runtime. More...

std::vector< Ort::Value >	input_tensors

std::vector< Ort::Value >	output_tensors

ExecutionProvider	executionProvider

Ort::Value	dummy_tensor {nullptr}

std::string	tempDirectoryPath

std::string	calibration_file_path

Member Function Documentation

◆ initLRE()

void LatentRuntimeEngine::initLRE ( std::vector< unsigned char > model )

private

Initializes the model for inference.

◆ configureTensorRTProvider()

void LatentRuntimeEngine::configureTensorRTProvider ( )

private

Configures TensorRT provider options.

◆ configureCUDAProvider()

void LatentRuntimeEngine::configureCUDAProvider ( )

private

Configures TensorRT provider options.

◆ fetchInputNodeInfo()

void LatentRuntimeEngine::fetchInputNodeInfo ( )

private

Fetches and stores input node information.

◆ fetchOutputNodeInfo()

void LatentRuntimeEngine::fetchOutputNodeInfo ( )

private

Fetches and stores output node information.

Member Data Documentation

◆ config

Options OnnxLre::LatentRuntimeEngine::config

private

◆ env

Ort::Env OnnxLre::LatentRuntimeEngine::env

private

ONNX Runtime environment.

◆ sessionOptions

Ort::SessionOptions OnnxLre::LatentRuntimeEngine::sessionOptions

private

Session options for ONNX Runtime.

◆ session

Ort::Session OnnxLre::LatentRuntimeEngine::session {nullptr}

private

The ONNX Runtime session for model inference.

◆ io_binding

Ort::IoBinding OnnxLre::LatentRuntimeEngine::io_binding {nullptr}

private

◆ model_path

std::string OnnxLre::LatentRuntimeEngine::model_path

private

Path to the ONNX model file.

◆ isModelLoaded

bool OnnxLre::LatentRuntimeEngine::isModelLoaded = false

private

Flag indicating if the model is successfully loaded.

◆ gpuOutput

bool OnnxLre::LatentRuntimeEngine::gpuOutput = false

private

Flag indicating if the output tensors would be on GPU (true for CUDA and TensorrRT)

◆ graphQuantized

bool OnnxLre::LatentRuntimeEngine::graphQuantized = false

private

◆ trt_calib

bool OnnxLre::LatentRuntimeEngine::trt_calib = false

private

Flag for trt_calib available.

◆ cpu_memory_info

Ort::MemoryInfo OnnxLre::LatentRuntimeEngine::cpu_memory_info {nullptr}

private

◆ cuda_memory_info

Ort::MemoryInfo OnnxLre::LatentRuntimeEngine::cuda_memory_info {nullptr}

private

◆ tensorrt_options

OrtTensorRTProviderOptionsV2* OnnxLre::LatentRuntimeEngine::tensorrt_options = nullptr

private

TensorRT provider options.

◆ cuda_options

OrtCUDAProviderOptionsV2* OnnxLre::LatentRuntimeEngine::cuda_options = nullptr

private

CUDA Provider options.

◆ metadata

Ort::ModelMetadata OnnxLre::LatentRuntimeEngine::metadata {nullptr}

private

◆ number_inputs

size_t OnnxLre::LatentRuntimeEngine::number_inputs = 0

private

◆ number_outputs

size_t OnnxLre::LatentRuntimeEngine::number_outputs = 0

private

Count of input and output nodes.

◆ input_names

std::vector<const char *> OnnxLre::LatentRuntimeEngine::input_names

private

◆ output_names

std::vector<const char *> OnnxLre::LatentRuntimeEngine::output_names

private

Names of input and output nodes.

◆ input_dtypes

std::vector<ONNXTensorElementDataType> OnnxLre::LatentRuntimeEngine::input_dtypes

private

◆ output_dtypes

std::vector<ONNXTensorElementDataType> OnnxLre::LatentRuntimeEngine::output_dtypes

private

Data types of input and output nodes.

◆ input_shapes

std::vector<std::vector<int64_t> > OnnxLre::LatentRuntimeEngine::input_shapes

private

◆ output_shapes

std::vector<std::vector<int64_t> > OnnxLre::LatentRuntimeEngine::output_shapes

private

Shapes of input and output nodes.

◆ input_tensors_dtype_bytes

std::vector<size_t> OnnxLre::LatentRuntimeEngine::input_tensors_dtype_bytes

private

◆ output_tensors_dtype_bytes

std::vector<size_t> OnnxLre::LatentRuntimeEngine::output_tensors_dtype_bytes

private

◆ allocator

Ort::AllocatorWithDefaultOptions OnnxLre::LatentRuntimeEngine::allocator

private

Allocator for ONNX Runtime.

◆ input_tensors

std::vector<Ort::Value> OnnxLre::LatentRuntimeEngine::input_tensors

private

◆ output_tensors

std::vector<Ort::Value> OnnxLre::LatentRuntimeEngine::output_tensors

private

◆ executionProvider

ExecutionProvider OnnxLre::LatentRuntimeEngine::executionProvider

private

◆ dummy_tensor

Ort::Value OnnxLre::LatentRuntimeEngine::dummy_tensor {nullptr}

private

◆ tempDirectoryPath

std::string OnnxLre::LatentRuntimeEngine::tempDirectoryPath

private

◆ calibration_file_path

std::string OnnxLre::LatentRuntimeEngine::calibration_file_path

private

The documentation for this class was generated from the following files:

include/onnx_lre.hpp
src/onnx_lre.cpp

Public Member Functions

Private Member Functions

Private Attributes

Member Function Documentation

◆ initLRE()

◆ configureTensorRTProvider()

◆ configureCUDAProvider()

◆ fetchInputNodeInfo()

◆ fetchOutputNodeInfo()

Member Data Documentation

◆ config

◆ env

◆ sessionOptions

◆ session

◆ io_binding

◆ model_path

◆ isModelLoaded

◆ gpuOutput

◆ graphQuantized

◆ trt_calib

◆ cpu_memory_info

◆ cuda_memory_info

◆ tensorrt_options

◆ cuda_options

◆ metadata

◆ number_inputs

◆ number_outputs

◆ input_names

◆ output_names

◆ input_dtypes

◆ output_dtypes

◆ input_shapes

◆ output_shapes

◆ input_tensors_dtype_bytes

◆ output_tensors_dtype_bytes

◆ allocator

◆ input_tensors

◆ output_tensors

◆ executionProvider

◆ dummy_tensor

◆ tempDirectoryPath

◆ calibration_file_path