Introduction
TensorRT acceleration helps your AI models run faster and use less power, making them better for real-time tasks like video or image recognition.
Jump into concepts and practice - no test required
import tensorrt as trt # Create a logger logger = trt.Logger(trt.Logger.WARNING) # Create builder and network builder = trt.Builder(logger) network = builder.create_network() # Parse your model (e.g., ONNX) and build engine parser = trt.OnnxParser(network, logger) with open('model.onnx', 'rb') as model_file: parser.parse(model_file.read()) engine = builder.build_cuda_engine(network)
import tensorrt as trt logger = trt.Logger(trt.Logger.WARNING) builder = trt.Builder(logger) network = builder.create_network() parser = trt.OnnxParser(network, logger) with open('model.onnx', 'rb') as f: parser.parse(f.read()) engine = builder.build_cuda_engine(network)
builder.max_batch_size = 1 builder.max_workspace_size = 1 << 30 # 1GB engine = builder.build_cuda_engine(network)
import tensorrt as trt import numpy as np import pycuda.driver as cuda import pycuda.autoinit # Logger for TensorRT logger = trt.Logger(trt.Logger.WARNING) # Build TensorRT engine from ONNX model builder = trt.Builder(logger) network = builder.create_network() parser = trt.OnnxParser(network, logger) with open('model.onnx', 'rb') as model_file: if not parser.parse(model_file.read()): print('Failed to parse ONNX model') for error in range(parser.num_errors): print(parser.get_error(error)) exit(1) builder.max_batch_size = 1 builder.max_workspace_size = 1 << 30 # 1GB engine = builder.build_cuda_engine(network) # Create execution context context = engine.create_execution_context() # Prepare dummy input data input_shape = (1, 3, 224, 224) # Example input shape input_data = np.random.random(input_shape).astype(np.float32) # Allocate device memory d_input = cuda.mem_alloc(input_data.nbytes) output_shape = (1, 1000) # Example output shape for classification output_data = np.empty(output_shape, dtype=np.float32) d_output = cuda.mem_alloc(output_data.nbytes) # Create CUDA stream stream = cuda.Stream() # Transfer input data to device cuda.memcpy_htod_async(d_input, input_data, stream) # Execute model context.execute_async_v2(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle) # Transfer predictions back cuda.memcpy_dtoh_async(output_data, d_output, stream) # Synchronize stream stream.synchronize() # Print top 5 predictions top5 = output_data[0].argsort()[-5:][::-1] print('Top 5 predicted class indices:', top5)
import tensorrt as trt
logger = trt.Logger()
builder = trt.Builder(logger)
network = builder.create_network()
parser = trt.OnnxParser(network, logger)
with open('missing_model.onnx', 'rb') as f:
parser.parse(f.read())
print('Model parsed successfully')builder = trt.Builder(logger)
network = builder.create_network()
parser = trt.OnnxParser(network, logger)
with open('model.onnx', 'rb') as f:
parser.parse(f.read())
engine = builder.build_cuda_engine(network)
What is the likely cause of the error?