Where do
.tflite files come from?- Converted from TensorFlow/Keras — see Convert TensorFlow models
- Downloaded pre-quantized from Qualcomm AI Hub — see AI Hub
- Exported from Edge Impulse — see Edge Impulse
Quantizing models
The NPU only supports uint8/int8 quantized models. Unsupported models, or unsupported layers will be automatically moved back to the CPU. You can use quantization-aware training or post-training quantization to quantize your LiteRT models. Make sure you follow the steps for “Full integer quantization”.Don’t want to quantize yourself? You can download a range of pre-quantized models from Qualcomm AI Hub, or use Edge Impulse to quantize new or existing models.
Running a model on the NPU (Python)
To offload a model to the NPU, you just need to load the LiteRT delegate; and pass it into the interpreter. For example:from ai_edge_litert.interpreter import Interpreter, load_delegate
qnn_delegate = load_delegate("libQnnTFLiteDelegate.so", options={"backend_type": "htp"})
interpreter = Interpreter(
model_path=...,
experimental_delegates=[qnn_delegate]
)
Running a model on the NPU (C++)
To offload a model to the NPU, you’ll first need to add the following compile flags:CFLAGS += -I${QNN_SDK_ROOT}/include
LDFLAGS += -L${QNN_SDK_ROOT}/lib/aarch64-ubuntu-gcc9.4 -lQnnTFLiteDelegate
// == Includes ==
#include "QNN/TFLiteDelegate/QnnTFLiteDelegate.h"
// == Application code ==
// Get your interpreter...
tflite::Interpreter *interpreter = ...;
// Create QNN Delegate options structure.
TfLiteQnnDelegateOptions options = TfLiteQnnDelegateOptionsDefault();
// Set the mandatory backend_type option. All other options have default values.
options.backend_type = kHtpBackend;
// Instantiate delegate. Must not be freed until interpreter is freed.
TfLiteDelegate* delegate = TfLiteQnnDelegateCreate(&options);
TfLiteStatus status = interpreter->ModifyGraphWithDelegate(delegate);
// Check that status == kTfLiteOk
Python examples
Prerequisites- Ubuntu OS should be flashed
- Terminal access with appropriate permissions
- If you haven’t previously installed the PPA packages, please run the following steps to install them: https://qualcomm-3.mintlify.io/devices/iq9075-evk/update-software/upgrade-ubuntu#4-upgrade-pre-built-packages
- Open the terminal on your development board, or an SSH session to your development board, create a new
venv, and install the LiteRT runtime and Pillow:python3 -m venv .venv-litert-demo --system-site-packages source .venv-litert-demo/bin/activate pip3 install ai-edge-litert==1.3.0 Pillow pip3 install opencv-python - To prepare the development environment, install the following packages. These packages provide essential components such as GTK bindings, Python development utilities, and build tools required for compiling and running the application effectively.
sudo apt install -y python3-gi python3-gi-cairo gir1.2-gtk-3.0 sudo apt install -y build-essential python3-dev python3-pip python3-venv python3-full pkg-config meson sudo apt install -y pkg-config cmake libcairo2-dev sudo apt install -y libgirepository1.0-dev gir1.2-glib-2.0
- Vision Transformers
- Image Classification
- Object Detection
Vision Transformers
Here’s how you can run a Vision Transformer model (downloaded from AI Hub) on both the CPU and the NPU using the LiteRT delegates.Create inference script
Create
inference_vit.py and add the following code:import numpy as np
from ai_edge_litert.interpreter import Interpreter, load_delegate
from PIL import Image
import os, time, sys
import urllib.request
def curr_ms():
return round(time.time() * 1000)
use_npu = True if len(sys.argv) >= 2 and sys.argv[1] == '--use-npu' else False
# Path to your quantized TFLite model and test image (will be download automatically)
MODEL_PATH = "vit-vit-w8a8.tflite"
IMAGE_PATH = "boa-constrictor.jpg"
LABELS_PATH = "vit-vit-labels.txt"
if not os.path.exists(MODEL_PATH):
print("Downloading model...")
model_url = 'https://cdn.edgeimpulse.com/qc-ai-docs/models/vit-vit-w8a8.tflite'
urllib.request.urlretrieve(model_url, MODEL_PATH)
if not os.path.exists(LABELS_PATH):
print("Downloading labels...")
labels_url = 'https://cdn.edgeimpulse.com/qc-ai-docs/models/vit-vit-labels.txt'
urllib.request.urlretrieve(labels_url, LABELS_PATH)
if not os.path.exists(IMAGE_PATH):
print("Downloading image...")
image_url = 'https://cdn.edgeimpulse.com/qc-ai-docs/examples/boa-constrictor.jpg'
urllib.request.urlretrieve(image_url, IMAGE_PATH)
with open(LABELS_PATH, 'r') as f:
labels = [line for line in f.read().splitlines() if line.strip()]
experimental_delegates = []
if use_npu:
experimental_delegates = [load_delegate("libQnnTFLiteDelegate.so", options={"backend_type": "htp"})]
# Load TFLite model and allocate tensors
interpreter = Interpreter(
model_path=MODEL_PATH,
experimental_delegates=experimental_delegates
)
interpreter.allocate_tensors()
# Get input and output tensor details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Load and preprocess image
def load_image(path, input_shape):
# Expected input shape: [1, height, width, channels]
_, height, width, channels = input_shape
img = Image.open(path).convert("RGB").resize((width, height))
img_np = np.array(img, dtype=np.uint8) # quantized models expect uint8
img_np = np.expand_dims(img_np, axis=0)
return img_np
input_shape = input_details[0]['shape']
input_data = load_image(IMAGE_PATH, input_shape)
# Set tensor and run inference
interpreter.set_tensor(input_details[0]['index'], input_data)
# Run once to warmup
interpreter.invoke()
# Then run 10x
start = curr_ms()
for i in range(0, 10):
interpreter.invoke()
end = curr_ms()
# Get prediction
q_output = interpreter.get_tensor(output_details[0]['index'])
scale, zero_point = output_details[0]['quantization']
f_output = (q_output.astype(np.float32) - zero_point) * scale
# Image classification models in AI Hub miss a Softmax() layer at the end of the model, so add it manually
def softmax(x, axis=-1):
# subtract max for numerical stability
x_max = np.max(x, axis=axis, keepdims=True)
e_x = np.exp(x - x_max)
return e_x / np.sum(e_x, axis=axis, keepdims=True)
# show top-5 predictions
scores = softmax(f_output[0])
top_k = scores.argsort()[-5:][::-1]
print("\nTop-5 predictions:")
for i in top_k:
print(f"Class {labels[i]}: score={scores[i]}")
print('')
print(f'Inference took (on average): {(end - start) / 10}ms. per image')
Run on CPU
python3 inference_vit.py
# INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
#
# Top-5 predictions:
# Class boa constrictor: score=0.6264431476593018
# Class rock python: score=0.047579940408468246
# Class night snake: score=0.006721484009176493
# Class mouse: score=0.0022421202156692743
# Class pick: score=0.001942973816767335
#
# Inference took (on average): 300.8ms. per image
Run on NPU
python3 inference_vit.py --use-npu
# INFO: TfLiteQnnDelegate delegate: 1382 nodes delegated out of 1633 nodes with 27 partitions.
#
# INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
#
# Top-5 predictions:
# Class boa constrictor: score=0.6113042235374451
# Class rock python: score=0.038359832018613815
# Class night snake: score=0.011630792170763016
# Class mouse: score=0.002294909441843629
# Class lens cap: score=0.0018960189772769809
#
# Inference took (on average): 13.9ms. per image
GTK-based image classification app
Here’s how you can use a GTK-based desktop application to run an image classification model — downloaded from AI Hub — on both the CPU and NPU using LiteRT delegates from AI Engine Direct.The GoogLeNet_w8a8.tflite model, sourced from AI Hub, leverages TensorFlow Lite with QNN delegate acceleration to enable efficient on-device inference.| Property | Value |
|---|---|
| Type | Desktop GUI application |
| Functionality | Image classification using TFLite |
| Modes | CPU and QNN delegate |
| Interface | GTK-based GUI |
| Output | Top predictions with confidence bars |
Environment setup and imports
The script sets up display-related environment variables (for Linux systems) and imports necessary libraries like OpenCV, NumPy, GTK, and TensorFlow Lite.
import cv2, numpy as np, os, time
from gi.repository import Gtk, GLib, GdkPixbuf
import ai_edge_litert.interpreter as tflite
GTK is used for the GUI, OpenCV for image handling, and TensorFlow Lite for inference.
Configuration constants
These constants define paths to the model, label file, and delegate library.
TF_MODEL = "/etc/models/inception_v3_quantized.tflite"
LABELS = "/etc/labels/imagenet_labels.txt"
DELEGATE_PATH = "libQnnTFLiteDelegate.so"
DEVICE_OS = "Ubuntu"
Download the TFLite model
This script checks if a TensorFlow Lite model file exists locally, and if not, downloads it from a specified Hugging Face URL.
import urllib.request
if not os.path.exists(TF_MODEL):
print("Downloading model...")
model_url = 'https://huggingface.co/qualcomm/GoogLeNet/resolve/main/GoogLeNet_w8a8.tflite'
urllib.request.urlretrieve(model_url, TF_MODEL)
Helper functions
Softmax calculation — ensures numerical stability when converting logits to probabilities:Label loader — loads class labels from a text file:Image preprocessing — prepares the image for model input:
def stable_softmax(logits):
logits = logits.astype(np.float32)
shifted_logits = np.clip(logits - np.max(logits), -500, 500)
exp_scores = np.exp(shifted_logits)
return exp_scores / np.sum(exp_scores)
def load_labels(label_path):
with open(label_path, 'r') as f:
return [line.strip() for line in f.readlines()]
def preprocess_image(image_path, input_shape, input_dtype):
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (input_shape[2], input_shape[1]))
img = img.astype(input_dtype)
return np.expand_dims(img, axis=0)
Inference execution
This function loads the model (with or without delegate), prepares input, runs inference, applies softmax, and returns top 4 predictions with confidence scores.
def runInference(image, use_delegate):
if use_delegate:
try:
delegate = tflite.load_delegate(DELEGATE_PATH, {'backend_type': 'htp'})
model = tflite.Interpreter(model_path=TF_MODEL, experimental_delegates=[delegate])
except:
model = tflite.Interpreter(model_path=TF_MODEL)
else:
model = tflite.Interpreter(model_path=TF_MODEL)
model.allocate_tensors()
input_details = model.get_input_details()
input_data = preprocess_image(image, input_details[0]['shape'], input_details[0]['dtype'])
model.set_tensor(input_details[0]['index'], input_data)
start_time = time.time()
model.invoke()
inference_time = time.time() - start_time
output_data = model.get_tensor(model.get_output_details()[0]['index'])
probabilities = stable_softmax(output_data[0])
labels = load_labels(LABELS)
top_indices = np.argsort(probabilities)[::-1][:4]
results = [(labels[i], probabilities[i] * 100) for i in top_indices]
return results, inference_time
GTK GUI and entry point
The GUI includes an image display area, radio buttons to choose CPU or delegate, buttons to select and reprocess images, and result display with labels and progress bars.
class FileBrowser(Gtk.FileChooserDialog):
def __init__(self):
super().__init__(title="Choose an image", action=Gtk.FileChooserAction.OPEN)
self.add_buttons(Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, Gtk.STOCK_OPEN, Gtk.ResponseType.OK)
def run_and_get_file(self):
if self.run() == Gtk.ResponseType.OK:
return self.get_filename()
self.destroy()
def main():
app = MainWindow()
app.connect("destroy", Gtk.main_quit)
app.show_all()
Gtk.main()
if __name__ == "__main__":
success, _ = Gtk.init_check()
if not success:
print("GTK could not be initialized.")
exit(1)
main()
Run the application
python3 classification.py
Download any image from internet. In this example we used a firetruck image. Through scp command copy image onto the device:scp xxx.jpg ubuntu@IP_address:/home/ubuntu/
Select delegate as your runtime option on the GUI:
Full reference code
Full reference code
# -----------------------------------------------------------------------------
#
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: BSD-3-Clause
#
# -----------------------------------------------------------------------------
import cv2
import gi
import numpy as np
import os
os.environ['xDG_RUNTIME_DIR'] = '/run/user/1000/'
os.environ['WAYLAND_DISPLAY'] = 'wayland-1'
os.environ['DISPLAY'] = ':0'
import time
import urllib.request
gi.require_version("Gtk", "3.0")
from gi.repository import Gtk, GLib, GdkPixbuf
# ========= Constants =========
TF_MODEL = "/home/ubuntu/GoogLeNet_w8a8.tflite"
LABELS = "/etc/labels/imagenet_labels.txt"
DELEGATE_PATH = "libQnnTFLiteDelegate.so"
DEVICE_OS="Ubuntu"
UNAME = os.uname().nodename
import ai_edge_litert.interpreter as tflite
if not os.path.exists(TF_MODEL):
print("Downloading model...")
model_url = 'https://huggingface.co/qualcomm/GoogLeNet/resolve/main/GoogLeNet_w8a8.tflite'
urllib.request.urlretrieve(model_url, TF_MODEL)
# ========= Helper Functions =========
def stable_softmax(logits):
logits = logits.astype(np.float32)
shifted_logits = logits - np.max(logits)
shifted_logits = np.clip(shifted_logits, -500, 500)
exp_scores = np.exp(shifted_logits)
probabilities = exp_scores / np.sum(exp_scores)
return probabilities
def load_labels(label_path):
with open(label_path, 'r') as f:
return [line.strip() for line in f.readlines()]
def resizeImage(pixbuf):
original_width = pixbuf.get_width()
original_height = pixbuf.get_height()
max_width = 800
max_height = 600
scale = min(max_width / original_width, max_height / original_height)
new_width = int(original_width * scale)
new_height = int(original_height * scale)
return new_width, new_height
def preprocess_image(image_path, input_shape, input_dtype):
img = cv2.imread(image_path)
if img is None:
raise ValueError(f"Failed to load image at {image_path}")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (input_shape[2], input_shape[1]))
img = img.astype(input_dtype)
img = np.expand_dims(img, axis=0)
return img
# ====== Inference Function ======
def runInference(image, use_delegate):
results = []
print(f"Running on {DEVICE_OS} using Delegate:{use_delegate}")
if use_delegate:
try:
delegate_options = { 'backend_type': 'htp' }
delegate = tflite.load_delegate(DELEGATE_PATH, delegate_options)
model = tflite.Interpreter(model_path=TF_MODEL, experimental_delegates=[delegate])
print("INFO: Loaded QNN delegate with HTP backend")
except Exception as e:
print(f"WARNING: Failed to load QNN delegate: {e}")
print("INFO: Continuing without QNN delegate")
model = tflite.Interpreter(model_path=TF_MODEL)
else:
model = tflite.Interpreter(model_path=TF_MODEL)
model.allocate_tensors()
input_details = model.get_input_details()
input_shape = input_details[0]['shape']
input_dtype = input_details[0]['dtype']
input_data = preprocess_image(image, input_shape, input_dtype)
model.set_tensor(input_details[0]['index'], input_data)
model.get_signature_list()
try:
start_time = time.time()
model.invoke()
end_time = time.time()
print("Interpreter invoked successfully.")
except Exception as e:
print(f"Error during model invocation: {e}")
return []
inference_time = end_time - start_time
output_details = model.get_output_details()
output_data = model.get_tensor(output_details[0]['index'])
labels = load_labels(LABELS)
predicted_index = np.argmax(output_data)
predicted_label = labels[predicted_index]
print("Predicted index:", predicted_index)
print("Predicted label:", predicted_label)
logits = output_data[0]
probabilities = stable_softmax(logits)
top_k = 4
top_indices = np.argsort(probabilities)[::-1][:top_k]
for i in top_indices:
result = (labels[i], probabilities[i] * 100)
results.append(result)
return results, inference_time
# ====== GTK GUI Classes ======
class FileBrowser(Gtk.FileChooserDialog):
def __init__(self):
super().__init__(title="Choose an image", action=Gtk.FileChooserAction.OPEN)
self.add_buttons(Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, Gtk.STOCK_OPEN, Gtk.ResponseType.OK)
def run_and_get_file(self):
response = super().run()
if response == Gtk.ResponseType.OK:
print("Selected file:", self.get_filename())
self.selected_file = self.get_filename()
self.destroy()
return self.selected_file
class MainWindow(Gtk.Window):
def __init__(self):
super().__init__(title="Image Classification")
self.set_default_size(800, 600)
self.imageFilepath = ""
self.mainBox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=10)
self.mainBox.set_margin_top(10)
self.mainBox.set_margin_bottom(10)
self.mainBox.set_margin_start(10)
self.mainBox.set_margin_end(10)
self.add(self.mainBox)
self.image = Gtk.Image()
try:
MAIN_IMAGE = "MainWindowPic.jpg"
self.image.set_from_file(MAIN_IMAGE)
except Exception as e:
print("Error loading main image:", e)
self.image.set_from_icon_name("image-missing", Gtk.IconSize.DIALOG)
self.mainBox.pack_start(self.image, True, True, 0)
self.infoBox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
delegate_label = Gtk.Label(label="Select Inference Mode:")
self.infoBox.pack_start(delegate_label, False, False, 10)
self.cpu_radio = Gtk.RadioButton.new_with_label_from_widget(None, "CPU")
self.delegate_radio = Gtk.RadioButton.new_with_label_from_widget(self.cpu_radio, "Delegate")
self.infoBox.pack_start(self.cpu_radio, False, False, 0)
self.infoBox.pack_start(self.delegate_radio, False, False, 0)
self.cpu_radio.connect("toggled", self.on_radio_toggled)
self.delegate_radio.connect("toggled", self.on_radio_toggled)
open_button = Gtk.Button(label="Select Image")
open_button.connect("clicked", self.on_open_file_clicked)
self.infoBox.pack_start(open_button, False, True, 10)
reprocess_button = Gtk.Button(label="Reprocess Image")
reprocess_button.connect("clicked", self.on_reprocess_image_clicked)
self.infoBox.pack_start(reprocess_button, False, True, 10)
self.results = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
self.infoBox.pack_start(self.results, True, True, 0)
self.mainBox.pack_start(self.infoBox, True, True, 0)
def use_delegate(self):
return self.delegate_radio.get_active()
def on_radio_toggled(self, button):
if button.get_active():
print(f"Selected option: {button.get_label()}")
def process_file(self, filepath):
try:
pixbuf = GdkPixbuf.Pixbuf.new_from_file(filepath)
new_width, new_height = resizeImage(pixbuf)
scaled_pixbuf = pixbuf.scale_simple(new_width, new_height, GdkPixbuf.InterpType.BILINEAR)
self.image.set_from_pixbuf(scaled_pixbuf)
use_delegate = self.use_delegate()
print("delegate: " , use_delegate)
options, inference_time = runInference(filepath, use_delegate)
for child in self.results.get_children():
self.results.remove(child)
for label, percent in options:
textBox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=10)
barBox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=10)
text = Gtk.Label(label=label, xalign=0)
text.set_size_request(100, -1)
bar = Gtk.ProgressBar()
bar.set_fraction(percent / 100.0)
bar.set_text(f"{percent:.2f}%")
bar.set_show_text(True)
textBox.pack_start(text, False, False, 0)
barBox.pack_start(bar, True, True, 0)
self.results.pack_start(textBox, False, False, 0)
self.results.pack_start(barBox, False, False, 0)
self.results.show_all()
time_label = Gtk.Label(label=f"Inference Time : {inference_time * 1000:.2f} ms")
self.results.pack_start(time_label, False, False, 50)
self.results.show_all()
except Exception as e:
print("Error reading file:", e)
def on_open_file_clicked(self, widget):
dialog = FileBrowser()
selected_file = dialog.run_and_get_file()
self.imageFilepath = selected_file
if selected_file:
self.process_file(selected_file)
def on_reprocess_image_clicked(self, widget):
self.process_file(self.imageFilepath)
def on_destroy(self, widget):
Gtk.main_quit()
# === Main Entry Point ===
def main():
app = MainWindow()
app.connect("destroy", Gtk.main_quit)
app.show_all()
Gtk.main()
if __name__ == "__main__":
success, _ = Gtk.init_check()
if not success:
print("GTK could not be initialized. Check environmental variables")
exit(1)
main()
Object detection with OpenCV and Wayland display
This Python script performs real-time object detection on a video file using a quantized YOLOv8 TensorFlow Lite model and displays the annotated frames via GStreamer on a Wayland display. It is optimized for edge AI scenarios using hardware acceleration through the QNN TFLite delegate.The YOLOv8 model isn’t available by default. Please follow Step 6 from Qualcomm Intelligent Multimedia SDK to export the YOLOv8 quantized model.
scp xxxx.tflite ubuntu@IP_address:/home/ubuntu/
Configuration
Paths for the model, labels, input video, and delegate are defined. Constants like frame dimensions, FPS, confidence threshold, and scaling factors are set for preprocessing and postprocessing.
Use a video file that works well with object detection models. For best results, choose videos with clear subjects, good lighting, and minimal motion blur (e.g. street scenes, warehouse floors, static camera feeds).
| Parameter | Value |
|---|---|
MODEL_PATH | yolov8_det_quantized.tflite |
LABEL_PATH | coco_labels.txt |
VIDEO_IN | video.mp4 |
DELEGATE_PATH | libQnnTFLiteDelegate.so |
Model loading and delegate setup
Loads the hardware delegate for accelerated inference and initializes the TensorFlow Lite interpreter with the quantized YOLOv8 model.
delegate_options = { 'backend_type': 'htp' }
delegate = tflite.load_delegate(DELEGATE_PATH, delegate_options)
interpreter = tflite.Interpreter(model_path=MODEL_PATH, experimental_delegates=[delegate])
interpreter.allocate_tensors()
GStreamer pipeline setup
Creates a GStreamer pipeline using appsrc to stream frames to a Wayland sink for real-time display of processed frames.
pipeline = Gst.parse_launch(
'appsrc name=src is-live=true block=true format=time '
'caps=video/x-raw,format=BGR,width=1600,height=900,framerate=30/1 '
'! videoconvert ! waylandsink')
Inference and post-processing
Runs inference on each frame, dequantizes outputs, applies a confidence threshold, and uses Non-Maximum Suppression (NMS) to remove overlapping boxes.
interpreter.set_tensor(in_det[0]['index'], input_tensor)
interpreter.invoke()
boxes_q = interpreter.get_tensor(out_det[0]['index'])[0]
scores_q = interpreter.get_tensor(out_det[1]['index'])[0]
classes_q = interpreter.get_tensor(out_det[2]['index'])[0]
# Filter by confidence threshold
mask = scores >= CONF_THRES
boxes_f = boxes[mask]
scores_f = scores[mask]
classes_f = classes[mask]
Annotation and display
Draws bounding boxes and labels on the frame using OpenCV, then streams to the Wayland display.
cv2.rectangle(frame_rs, (x1i, y1i), (x2i, y2i), (0,255,0), 2)
cv2.putText(frame_rs, f"{lab} {sc:.2f}", (x1i, max(10,y1i-5)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
Full reference code
Full reference code
# -----------------------------------------------------------------------------
#
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: BSD-3-Clause
#
# -----------------------------------------------------------------------------
#!/usr/bin/env python3
import cv2
import numpy as np
import gi
gi.require_version('Gst', '1.0')
from gi.repository import Gst
import ai_edge_litert.interpreter as tflite
Gst.init(None)
# -------------------- Parameters --------------------
MODEL_PATH = "/etc/models/yolox_quantized.tflite"
LABEL_PATH = "/etc/labels/coco_labels.txt"
VIDEO_IN = "/etc/media/video.mp4"
DELEGATE_PATH = "libQnnTFLiteDelegate.so"
FRAME_W, FRAME_H = 1600, 900
FPS_OUT = 30
CONF_THRES = 0.25
NMS_IOU_THRES = 0.50
BOX_SCALE = 3.2108588218688965
BOX_ZP = 31.0
SCORE_SCALE = 0.0038042240776121616
# -------------------- Load Model --------------------
delegate_options = { 'backend_type': 'htp' }
delegate = tflite.load_delegate(DELEGATE_PATH, delegate_options)
interpreter = tflite.Interpreter(model_path=MODEL_PATH, experimental_delegates=[delegate])
interpreter.allocate_tensors()
in_det = interpreter.get_input_details()
out_det = interpreter.get_output_details()
in_h, in_w = in_det[0]["shape"][1:3]
# -------------------- Load Labels --------------------
labels = [l.strip() for l in open(LABEL_PATH)]
# -------------------- GStreamer Pipeline --------------------
pipeline = Gst.parse_launch(
'appsrc name=src is-live=true block=true format=time '
'caps=video/x-raw,format=BGR,width=1600,height=900,framerate=30/1 '
'! videoconvert ! waylandsink'
)
appsrc = pipeline.get_by_name('src')
pipeline.set_state(Gst.State.PLAYING)
# -------------------- Video Input --------------------
cap = cv2.VideoCapture(VIDEO_IN)
sx, sy = FRAME_W / in_w, FRAME_H / in_h
frame_rs = np.empty((FRAME_H, FRAME_W, 3), np.uint8)
input_tensor = np.empty((1, in_h, in_w, 3), np.uint8)
frame_cnt = 0
# -------------------- Main Loop --------------------
while True:
ok, frame = cap.read()
if not ok:
break
frame_cnt += 1
cv2.resize(frame, (FRAME_W, FRAME_H), dst=frame_rs)
cv2.resize(frame_rs, (in_w, in_h), dst=input_tensor[0])
interpreter.set_tensor(in_det[0]['index'], input_tensor)
interpreter.invoke()
boxes_q = interpreter.get_tensor(out_det[0]['index'])[0]
scores_q = interpreter.get_tensor(out_det[1]['index'])[0]
classes_q = interpreter.get_tensor(out_det[2]['index'])[0]
boxes = BOX_SCALE * (boxes_q.astype(np.float32) - BOX_ZP)
scores = SCORE_SCALE * scores_q.astype(np.float32)
classes = classes_q.astype(np.int32)
mask = scores >= CONF_THRES
if np.any(mask):
boxes_f = boxes[mask]
scores_f = scores[mask]
classes_f = classes[mask]
x1, y1, x2, y2 = boxes_f.T
boxes_cv2 = np.column_stack((x1, y1, x2 - x1, y2 - y1))
idx_cv2 = cv2.dnn.NMSBoxes(
bboxes=boxes_cv2.tolist(),
scores=scores_f.tolist(),
score_threshold=CONF_THRES,
nms_threshold=NMS_IOU_THRES
)
if len(idx_cv2):
idx = idx_cv2.flatten()
sel_boxes = boxes_f[idx]
sel_scores = scores_f[idx]
sel_classes = classes_f[idx]
if frame_cnt % 100 == 0:
print(f"[{frame_cnt:4d}] max score = {sel_scores.max():.3f}")
sel_boxes[:, [0,2]] *= sx
sel_boxes[:, [1,3]] *= sy
sel_boxes = sel_boxes.astype(np.int32)
sel_boxes[:, [0,2]] = np.clip(sel_boxes[:, [0,2]], 0, FRAME_W-1)
sel_boxes[:, [1,3]] = np.clip(sel_boxes[:, [1,3]], 0, FRAME_H-1)
for (x1i, y1i, x2i, y2i), sc, cl in zip(sel_boxes, sel_scores, sel_classes):
cv2.rectangle(frame_rs, (x1i, y1i), (x2i, y2i), (0,255,0), 2)
lab = labels[cl] if cl < len(labels) else str(cl)
cv2.putText(frame_rs, f"{lab} {sc:.2f}", (x1i, max(10,y1i-5)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
data = frame_rs.tobytes()
buf = Gst.Buffer.new_allocate(None, len(data), None)
buf.fill(0, data)
buf.duration = Gst.util_uint64_scale_int(1, Gst.SECOND, FPS_OUT)
timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) * Gst.MSECOND
buf.pts = buf.dts = int(timestamp)
appsrc.emit('push-buffer', buf)
# -------------------- Finish --------------------
appsrc.emit('end-of-stream')
pipeline.set_state(Gst.State.NULL)
cap.release()
print("Done – video streamed to Wayland sink")


