import os, sys, time, urllib.request, numpy as np, onnxruntime as ort
from PIL import Image
use_npu = True if len(sys.argv) >= 2 and sys.argv[1] == '--use-npu' else False
def download_file_if_not_exists(path, url):
if not os.path.exists(path):
os.makedirs(os.path.dirname(path), exist_ok=True)
print(f"Downloading {path} from {url}...")
urllib.request.urlretrieve(url, path)
return path
# Path to your model/label/test image (will be download automatically)
MODEL_PATH = download_file_if_not_exists('models/squeezenet-1.1/model.onnx', 'https://cdn.edgeimpulse.com/qc-ai-docs/models/SqueezeNet-1.1_w8a8.onnx')
MODEL_DATA_PATH = download_file_if_not_exists('models/squeezenet-1.1/model.data', 'https://cdn.edgeimpulse.com/qc-ai-docs/models/SqueezeNet-1.1_w8a8.data')
LABELS_PATH = download_file_if_not_exists('models/squeezenet-1.1_labels.txt', 'https://cdn.edgeimpulse.com/qc-ai-docs/models/SqueezeNet-1.1_labels.txt')
IMAGE_PATH = download_file_if_not_exists('images/boa-constrictor.jpg', 'https://cdn.edgeimpulse.com/qc-ai-docs/examples/boa-constrictor.jpg')
# Parse labels
with open(LABELS_PATH, 'r') as f:
labels = [line for line in f.read().splitlines() if line.strip()]
# Use HTP backend of libQnnTFLiteDelegate.so (NPU) when --use-npu is passed in (otherwise CPU)
providers = []
if use_npu:
providers.append(("QNNExecutionProvider", {
"backend_type": "htp",
}))
else:
providers.append("CPUExecutionProvider")
so = ort.SessionOptions()
sess = ort.InferenceSession(MODEL_PATH, sess_options=so, providers=providers)
actual_providers = sess.get_providers()
print(f"Using providers: {actual_providers}") # Show which providers are actually loaded
inputs = sess.get_inputs()
outputs = sess.get_outputs()
# !! Quantization parameters (cannot read these params from the onnx model I believe) - update these if you have another model
scale = 1.0 / 255.0
zero_point = 0
dtype = np.uint8
# Load, preprocess and quantize image
def load_image_for_onnx(path, H, W):
# Load image
img = Image.open(path).convert("RGB").resize((W, H))
img_np = np.array(img, dtype=np.float32)
# !! Normalize... this model is 0..1 scaled (no further normalization); but that depends on your model !!
img_np = img_np / 255
# HWC -> CHW
img_np = np.transpose(img_np, (2, 0, 1))
# Add batch dim
img_np = np.expand_dims(img_np, 0)
# Quantize input if needed
if dtype == np.float32:
return img_np
elif dtype == np.uint8:
# q = round(x/scale + zp)
q = np.round(img_np / scale + zero_point)
return np.clip(q, 0, 255).astype(np.uint8)
elif dtype == np.int8:
# Commonly zero_point ≈ 0 (symmetric), but use provided zp anyway
q = np.round(img_np / scale + zero_point)
return np.clip(q, -128, 127).astype(np.int8)
else:
raise Exception('Unexpected dtype: ' + str(dtype))
# input data scaled 0..1
input_data = load_image_for_onnx(path=IMAGE_PATH, H=224, W=224)
# Warmup once
_ = sess.run(None, { sess.get_inputs()[0].name: input_data })
# Run 10x so we can calculate avg. runtime per inference
start = time.perf_counter()
for i in range(10):
out = sess.run(None, { sess.get_inputs()[0].name: input_data })
end = time.perf_counter()
# Image classification models in AI Hub miss a Softmax() layer at the end of the model, so add it manually
def softmax(x, axis=-1):
# subtract max for numerical stability
x_max = np.max(x, axis=axis, keepdims=True)
e_x = np.exp(x - x_max)
return e_x / np.sum(e_x, axis=axis, keepdims=True)
scores = softmax(np.squeeze(out[0], axis=0))
# Take top 5
top_k_idx = scores.argsort()[-5:][::-1]
print("\nTop-5 predictions:")
for i in top_k_idx:
label = labels[i] if i < len(labels) else f"Class {i}"
print(f"{label}: score={scores[i]}")
print("")
print(f'Inference took (on average): {((end - start) * 1000) / 10:.4g}ms. per image')