【“星睿O6”AI PC开发套件评测】人脸检测
本文介绍了瑞莎星睿 O6 (Radxa Orion O6) 开发板结合 Haar 级联和 YuNet 模型实现人脸检测的项目设计,包括板端推理和动态检测,并通过 cix 格式的预训练 FaceNet 模型文件实现板端 NPU 推理。
项目介绍
- 板端推理:结合 Haar 级联人脸检测模型,实现本地图片的推理;
- YuNet 模型:结合 YuNet 模型实现人脸检测和置信度标注;
- 动态检测:通过 USB 摄像头实现动态帧画面的人脸检测;
- NPU 推理:利用板载 NPU 资源实现基于 FaceNet 模型的人脸检测。
板端推理
结合相关人脸检测模型,实现本地图片的推理,主要包括部署OpenCV等所需库和软件包、下载预训练文件等准备工作、流程图、关键代码、效果演示等。
准备工作
- 终端执行指令
apt install python3-opencv
安装 OpenCV 库; - 安装进程结束后,执行指令
python3 -c "import cv2; print(cv2.__version__)"
获取 OpenCV 版本号并检验是否安装完成;
- 下载 OpenCV 人脸识别预训练文件 haarcascade_frontalface_default.xml ,以便后续推理时调用;
流程图
代码
执行 touch fd_xml.py
指令新建文件,并使用 nano 文本编辑器添加如下代码
import cv2
from pathlib import Path
def detect_faces(image_path: str,
max_side: int = 1280,
padding: float = 0.05) -> None:
"""
零切割人脸检测
:param image_path: 原图路径
:param max_side: 检测前最长边上限(越大越慢,越小越可能漏)
:param padding: 矩形向外扩的边距比例(0.05 = 5 %)
"""
# 1. 读图
img = cv2.imread(image_path)
if img is None:
raise FileNotFoundError(image_path)
h0, w0 = img.shape[:2]
# 2. 等比例缩放
scale = min(1.0, max_side / max(h0, w0))
if scale < 1.0:
img_small = cv2.resize(img, (int(w0 * scale), int(h0 * scale)),
interpolation=cv2.INTER_LINEAR)
else:
img_small = img
h1, w1 = img_small.shape[:2]
# 3. 灰度 + 检测
gray = cv2.cvtColor(img_small, cv2.COLOR_BGR2GRAY)
cascade_path = "haarcascade_frontalface_default.xml"
face_cascade = cv2.CascadeClassifier(str(cascade_path))
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=7,
minSize=(60, 60)
)
# 4. 映射回原图 + 边缘修正
for (x, y, w, h) in faces:
# 映射回原图坐标
x = int(x / scale)
y = int(y / scale)
w = int(w / scale)
h = int(h / scale)
# 外扩边距
dw = int(w * padding)
dh = int(h * padding)
x = max(0, x - dw)
y = max(0, y - dh)
x2 = min(w0, x + w + 2 * dw)
y2 = min(h0, y + h + 2 * dh)
cv2.rectangle(img, (x, y), (x2, y2), (0, 255, 0), 2)
# 5. 显示
window_name = "Face Detection"
max_h = 500 # 高度不超过 500 px
if h0 > max_h:
scale_show = max_h / h0
new_w = int(w0 * scale_show)
show_img = cv2.resize(img, (new_w, max_h))
else:
show_img = img # 原图
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
print("[INFO] press 'q' or ESC in the window to quit")
while True:
cv2.imshow("Face Detection", show_img)
k = cv2.waitKey(200) & 0xFF
if k == ord('q') or k == 27: # 27 = ESC
break
cv2.destroyAllWindows()
if __name__ == "__main__":
detect_faces(r"img/friends.jpg")
保存代码。
目标人脸图片文件置于 img 文件夹,模型文件置于 models 文件夹。
效果
终端进入目标文件夹,执行 python fd_xml.py
弹窗显示人脸检测结果;
YuNet 模型
为了同时显示置信度,可使用轻量化的 YuNet 预训练 ONNX 模型,流程图与 Haar 类似,识别模型改为 YuNet .
- 下载 YuNet 人脸识别预训练文件 face_detection_yunet_2023mar.onnx ,以便后续推理时调用;
详见:face_detection_yunet · opencv/opencv_zoo .
代码
终端执行 touch fd_onnx.py
指令新建文件,并添加如下代码
import cv2
import numpy as np
def detect_faces_yunet(image_path: str,
model_path: str = "models/face_detection_yunet_2023mar.onnx",
conf_threshold: float = 0.8) -> None:
img = cv2.imread(image_path)
if img is None:
raise FileNotFoundError(image_path)
h0, w0 = img.shape[:2]
# 1. 对齐到 32 倍数
def align32(x): return (x + 31) // 32 * 32
w_align, h_align = align32(w0), align32(h0)
# 2. letterbox 缩放(保持比例,边缘灰条)
scale = min(w_align / w0, h_align / h0)
new_w, new_h = int(w0 * scale), int(h0 * scale)
pad_x, pad_y = (w_align - new_w) // 2, (h_align - new_h) // 2
letter = 128 * np.ones((h_align, w_align, 3), dtype=np.uint8)
letter[pad_y:pad_y + new_h, pad_x:pad_x + new_w] = \
cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
# 3. 初始化 & 检测
detector = cv2.FaceDetectorYN_create(
model=model_path,
config="",
input_size=(w_align, h_align),
score_threshold=conf_threshold,
nms_threshold=0.4,
top_k=5000
)
faces = detector.detect(letter)[1]
if faces is None:
faces = []
# 4. 把框映射回原图坐标
for face in faces:
x, y, w, h, *_ = map(int, face[:4])
score = face[-1]
# 去掉 letterbox 偏移并反缩放
x = int((x - pad_x) / scale)
y = int((y - pad_y) / scale)
w = int(w / scale)
h = int(h / scale)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
label = f"{score:.2f}"
label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv2.rectangle(img, (x, y - label_size[1] - 4),
(x + label_size[0], y), (0, 255, 0), -1)
cv2.putText(img, label, (x, y - 2), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 0), 1, cv2.LINE_AA)
# 5. 一次性显示
max_h = 500
show_img = cv2.resize(img, (int(w0 * max_h / h0), max_h)) if h0 > max_h else img
print("[INFO] press 'q' or ESC in the window to quit")
while True:
cv2.imshow("YuNet", show_img)
k = cv2.waitKey(200) & 0xFF
if k == ord('q') or k == 27: # 27 = ESC
break
cv2.destroyAllWindows()
if __name__ == "__main__":
detect_faces_yunet("img/friends.jpg")
保存文件;
效果
动态检测
- 连接 USB 摄像头,捕捉画面帧并进行人脸检测,并将结果上传至窗口显示。
- 使用 YuNet 模型,加入置信度并提升识别准确率
准备工作
- 连接 USB 摄像头(这里使用罗技 C270 标准 UVC 摄像头);
- 终端执行指令
sudo apt-get install v4l-utils
- 安装 v4l-utils 工具获取设备列表
v4l2-ctl --list-devices
- 根据给出的设备列表,找到 UVC 相机对应的设备号,如
video5
;
流程图
代码
终端执行指令 touch fd_CamUSB.py
创建文件并添加如下代码
import cv2
from pathlib import Path
import urllib.request
import sys
# ---------- 1. 模型路径 & 下载 ----------
MODEL_URL = (
"https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/"
"face_detection_yunet_2023mar.onnx"
)
MODEL_PATH = Path("models/face_detection_yunet_2023mar.onnx")
if not MODEL_PATH.exists():
print("首次使用,正在下载 YuNet 权重...")
urllib.request.urlretrieve(MODEL_URL, MODEL_PATH)
print("下载完成.")
# ---------- 2. 初始化摄像头 ----------
def initialize_camera():
"""初始化USB摄像头"""
print("=== USB摄像头初始化 ===")
# USB摄像头的设备索引
usb_camera_indices = [5, 6]
for camera_index in usb_camera_indices:
print(f"尝试打开 /dev/video{camera_index}...")
try:
cap = cv2.VideoCapture(camera_index, cv2.CAP_V4L2)
if cap.isOpened():
ret, frame = cap.read()
if ret and frame is not None:
print(f"✓ 成功打开 USB摄像头 /dev/video{camera_index}")
return cap, camera_index
else:
cap.release()
except Exception as e:
print(f" 错误: {e}")
continue
return None, None
cap, camera_index = initialize_camera()
if cap is None:
print("错误: 无法打开任何摄像头")
print("请检查:")
print("1. USB摄像头是否已连接")
print("2. 摄像头权限: sudo chmod 666 /dev/video5 /dev/video6")
print("3. 用户组: sudo usermod -a -G video $USER")
sys.exit(1)
# 读取一帧拿到分辨率
ret, frame = cap.read()
if not ret:
cap.release()
sys.exit("无法读取画面")
h, w = frame.shape[:2]
print(f"摄像头分辨率: {w}x{h}")
# ---------- 3. 初始化 YuNet ----------
detector = cv2.FaceDetectorYN_create(
model=str(MODEL_PATH),
config="",
input_size=(w, h),
score_threshold=0.7,
nms_threshold=0.4,
top_k=5000
)
# ---------- 4. 主循环 ----------
print("按 q 退出")
while True:
ret, frame = cap.read()
if not ret:
break
# 检测
detector.setInputSize((w, h))
faces = detector.detect(frame)[1] # shape: (N, 15)
if faces is None:
faces = []
for face in faces:
x, y, w_box, h_box = map(int, face[:4])
score = float(face[-1])
# 画框
cv2.rectangle(frame, (x, y), (x + w_box, y + h_box), (0, 255, 0), 2)
# 写置信度
label = f"{score:.2f}"
cv2.putText(frame, label, (x, y - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
# 显示
cv2.imshow("YuNet USB Camera", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# ---------- 5. 清理 ----------
cap.release()
cv2.destroyAllWindows()
保存代码;
效果
- 执行
python -m venv venv
进入虚拟环境; - 执行
source venv/bin/activate
激活虚拟环境; - 执行
python fd_CamUSB.py
运行程序检测 USB 摄像头设备并打开弹窗,显示实时画面。
- 将摄像头对准目标检测场景,即可显示识别结果
动态效果
识别准确率较高,但使用 CPU 推理会占用较多资源。
NPU 推理
- 下载预训练模型 ai_model_hub_24_Q4 · 模型库 .
主机终端执行
git clone https://www.modelscope.cn/cix/ai_model_hub_24_Q4.git
拉取 AI Model Hub
预训练模型文件;
目录结构
├── datasets
├── models
│ ├── Audio
│ │ └── Speech_Recognotion
│ ├── ComputeVision
│ │ ├── Face_Detection
│ │ ├── Face_Recognition
│ │ ├── Image_Classification
│ │ ├── Object_Detection
│ │ ├── Pose_Estimation
│ │ ├── Semantic_Segmentation
│ │ └── Super_Resolution
│ └── Generative_AI
│ ├── LLM
│ └── Text_Image_Search
└── utils
├── evaluate
└── label
运行原理
- 将人类可读的输入预处理为模型输入
- 运行模型推理
- 将模型输出后处理为人类可读的格式
流程图
代码
# ---------------------------------------------------------------------
# Copyright 2022-2025 Cix Technology Group Co., Ltd. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# ---------------------------------------------------------------------
"""
This is the script showing how to run facenet model inference on cix npu.
"""
import numpy as np
import cv2
from PIL import Image
from typing import Tuple, Union, List
from tqdm import tqdm
import sys
import argparse
import os
# Define the absolute path to the utils package by going up four directory levels from the current file location
_abs_path = os.path.join(os.path.dirname(__file__), "../../../../")
# Append the utils package path to the system path, making it accessible for imports
sys.path.append(_abs_path)
from utils.tools import get_file_list
from utils.NOE_Engine import EngineInfer
def get_args():
parser = argparse.ArgumentParser()
# Argument for the path to the image or directory containing images
parser.add_argument(
"--image_path",
default="./test_data/",
help="path to the image file path or dir path.\
eg. image_path=./test_data/",
)
# Argument for the path to the cix binary model file
parser.add_argument(
"--model_path",
default="facenet.cix",
help="path to the model file",
)
parser.add_argument(
"--output_dir", default="./output", help="path to the result output"
)
args = parser.parse_args()
return args
def smart_resize(image: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
"""
Resize an image to a target shape while preserving aspect ratio.
Parameters
----------
image : np.ndarray
The input image.
shape : Tuple[int, int]
The target shape (height, width).
Returns
-------
np.ndarray
The resized image
"""
Ht, Wt = shape
if image.ndim == 2:
Ho, Wo = image.shape
Co = 1
else:
Ho, Wo, Co = image.shape
if Co == 3 or Co == 1:
k = float(Ht + Wt) / float(Ho + Wo)
return cv2.resize(
image,
(int(Wt), int(Ht)),
interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4,
)
else:
return np.stack(
[smart_resize(image[:, :, i], shape) for i in range(Co)], axis=2
)
class FaceLandmarkDetector:
"""
The OpenPose face landmark detector model using ONNXRuntime.
Parameters
----------
face_model_path : str
The path to the ONNX model file.
"""
def __init__(self, face_model_path) -> None:
"""
Initialize the OpenPose face landmark detector model.
Parameters
----------
face_model_path : Path
The path to the ONNX model file.
"""
# Initialize
self.model = EngineInfer(face_model_path)
self.input_name = 'input'
def _inference(self, face_img: np.ndarray) -> np.ndarray:
"""
Run the OpenPose face landmark detector model on an image.
Parameters
----------
face_img : np.ndarray
The input image.
Returns
-------
np.ndarray
The detected keypoints.
"""
# face_img should be a numpy array: H x W x C (likely RGB or BGR)
H, W, C = face_img.shape
# Preprocessing
w_size = 368 # ONNX is exported for this size
# Resize input image
resized_img = cv2.resize(
face_img, (w_size, w_size), interpolation=cv2.INTER_LINEAR
)
# Normalize: /256.0 - 0.5 (mimicking original code)
x_data = resized_img.astype(np.float32) / 256.0 - 0.5
# Convert to channel-first format: (C, H, W)
x_data = np.transpose(x_data, (2, 0, 1))
# Add batch dimension: (1, C, H, W)
x_data = np.expand_dims(x_data, axis=0)
# Run inference
outputs = self.model.forward(x_data)
# Assuming the model's last output corresponds to the heatmaps
# and is shaped like (1, num_parts, h_out, w_out)
heatmaps_original = outputs[-1]
# Remove batch dimension: (num_parts, h_out, w_out)
heatmaps_original = heatmaps_original.reshape((1, 71, 46, 46))
heatmaps_original = heatmaps_original[0]
# Resize the heatmaps back to the original image size
num_parts = heatmaps_original.shape[0]
heatmaps = np.zeros((num_parts, H, W), dtype=np.float32)
for i in range(num_parts):
heatmaps[i] = cv2.resize(
heatmaps_original[i], (W, H), interpolation=cv2.INTER_LINEAR
)
peaks = self.compute_peaks_from_heatmaps(heatmaps)
return peaks
def __call__(
self,
face_img: Union[np.ndarray, List[np.ndarray], Image.Image, List[Image.Image]],
) -> List[np.ndarray]:
"""
Run the OpenPose face landmark detector model on an image.
Parameters
----------
face_img : Union[np.ndarray, Image.Image, List[Image.Image]]
The input image or a list of input images.
Returns
-------
List[np.ndarray]
The detected keypoints.
"""
if isinstance(face_img, Image.Image):
image_list = [np.array(face_img)]
elif isinstance(face_img, list):
if isinstance(face_img[0], Image.Image):
image_list = [np.array(img) for img in face_img]
elif isinstance(face_img[0], np.ndarray):
image_list = face_img
else:
raise ValueError("List elements must be PIL.Image or np.ndarray")
elif isinstance(face_img, np.ndarray):
if face_img.ndim == 4:
image_list = [img for img in face_img]
elif face_img.ndim == 3:
image_list = [face_img]
else:
raise ValueError("Unsupported ndarray shape.")
else:
raise ValueError("Unsupported input type.")
results = []
for image in tqdm(image_list):
keypoints = self._inference(image)
results.append(keypoints)
return results
def compute_peaks_from_heatmaps(self, heatmaps: np.ndarray) -> np.ndarray:
"""
Compute the peaks from the heatmaps.
Parameters
----------
heatmaps : np.ndarray
The heatmaps.
Returns
-------
np.ndarray
The peaks, which are keypoints.
"""
all_peaks = []
for part in range(heatmaps.shape[0]):
map_ori = heatmaps[part].copy()
binary = np.ascontiguousarray(map_ori > 0.02, dtype=np.uint8)
if np.sum(binary) == 0:
all_peaks.append([-1, -1])
continue
positions = np.where(binary > 0.5)
intensities = map_ori[positions]
mi = np.argmax(intensities)
y, x = positions[0][mi], positions[1][mi]
all_peaks.append([x, y])
return np.array(all_peaks)
def release(self):
self.model.clean()
if __name__ == "__main__":
args = get_args()
# Get a list of images from the provided path
images_list = get_file_list(args.image_path)
print(images_list)
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
detector = FaceLandmarkDetector(args.model_path)
for image_path in images_list:
image = cv2.imread(image_path)
print(image_path)
if image is None:
raise FileNotFoundError(f"Error can't open: {image_path}")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
if len(faces) == 0:
print("no detect faces")
exit(0)
print(f"detect {len(faces)} faces")
for i, (x, y, w, h) in enumerate(faces):
margin = int(0.2 * w)
x1 = max(x - margin, 0)
y1 = max(y - margin, 0)
x2 = min(x + w + margin, image.shape[1])
y2 = min(y + h + margin, image.shape[0])
face_img = image[y1:y2, x1:x2]
face_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
keypoints_list = detector(face_rgb)
keypoints = keypoints_list[0]
for (px, py) in keypoints:
if px != -1 and py != -1:
cv2.circle(face_img, (int(px), int(py)), 2, (0, 255, 0), -1)
output_dir = args.output_dir
os.makedirs(output_dir, exist_ok=True)
out_image_path = os.path.join(
output_dir, "npu_" + os.path.basename(image_path)
)
cv2.imwrite(out_image_path, image)
detector.release()
cv2.waitKey(0)
cv2.destroyAllWindows()
- 将示例代码上传至 O6 板端;
- 使用 NPU 端到端运行:
python3 inference_npu.py
结果输出到 output 文件夹
总结
本文介绍了瑞莎星睿 O6 (Radxa Orion O6) 开发板结合 Haar 级联和 YuNet 模型实现人脸检测的项目设计,包括板端推理和动态检测,并通过 cix 格式的预训练 FaceNet 模型文件实现板端 NPU 推理,为相关产品的开发设计和快速应用提供了参考。