This program loads an ONNX model, builds a TensorRT engine, runs a dummy input through it, and prints the top 5 predicted classes.
import tensorrt as trt
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
# Logger for TensorRT
logger = trt.Logger(trt.Logger.WARNING)
# Build TensorRT engine from ONNX model
builder = trt.Builder(logger)
network = builder.create_network()
parser = trt.OnnxParser(network, logger)
with open('model.onnx', 'rb') as model_file:
if not parser.parse(model_file.read()):
print('Failed to parse ONNX model')
for error in range(parser.num_errors):
print(parser.get_error(error))
exit(1)
builder.max_batch_size = 1
builder.max_workspace_size = 1 << 30 # 1GB
engine = builder.build_cuda_engine(network)
# Create execution context
context = engine.create_execution_context()
# Prepare dummy input data
input_shape = (1, 3, 224, 224) # Example input shape
input_data = np.random.random(input_shape).astype(np.float32)
# Allocate device memory
d_input = cuda.mem_alloc(input_data.nbytes)
output_shape = (1, 1000) # Example output shape for classification
output_data = np.empty(output_shape, dtype=np.float32)
d_output = cuda.mem_alloc(output_data.nbytes)
# Create CUDA stream
stream = cuda.Stream()
# Transfer input data to device
cuda.memcpy_htod_async(d_input, input_data, stream)
# Execute model
context.execute_async_v2(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
# Transfer predictions back
cuda.memcpy_dtoh_async(output_data, d_output, stream)
# Synchronize stream
stream.synchronize()
# Print top 5 predictions
top5 = output_data[0].argsort()[-5:][::-1]
print('Top 5 predicted class indices:', top5)