This project demonstrates how to interact with a 3D car model using hand gestures in Augmented Reality using Python, MediaPipe and PyRender.
Features
- Real-time hand gesture tracking
- 3D AR model viewer
- Wireframe inspection mode
- Engine inspection mode
- Two finger explode animation
- Finger based model rotation
Required Libraries
pip install opencv-python mediapipe numpy pyrender trimesh
Copy the command above and run it in your terminal before running the project.
How the System Works
The webcam captures hand gestures. MediaPipe detects finger landmarks and based on finger positions the system loads different 3D models like car view, engine view or wireframe inspection.
Python Code
import cv2
import mediapipe as mp
import numpy as np
import trimesh
import pyrender
import math
# =============================
# MODEL PATHS
# =============================
CAR_PATH = r"C:/python/Augmented reality/GLB FILES/car5.glb"
ENGINE_PATH = r"C:/python/Augmented reality/GLB FILES/V8Engine/v8_engine.glb"
WIREFRAME_PATH = r"C:/python/Augmented reality/GLB FILES/wireframe1.glb"
# =============================
# MODEL SCALES (SEPARATE)
# =============================
CAR_MODEL_SCALE = 2.0
ENGINE_MODEL_SCALE = 0.5
WIREFRAME_MODEL_SCALE = 2.0
# =============================
# DISPLAY SETTINGS
# =============================
WIDTH = 1280
HEIGHT = 720
RIGHT_PANEL = 220
CAMERA_DISTANCE = 8
EXPLODE_MULT = 2
# =============================
# LOAD MODEL
# =============================
def load_model(path, scale, wire=False):
mesh = trimesh.load(path)
scene = pyrender.Scene()
nodes=[]
directions=[]
if isinstance(mesh,trimesh.Scene):
for g in mesh.geometry.values():
g = g.copy()
g.apply_scale(scale)
try:
m=pyrender.Mesh.from_trimesh(g,smooth=False,wireframe=wire)
except:
g.visual=trimesh.visual.ColorVisuals(g)
m=pyrender.Mesh.from_trimesh(g,smooth=False,wireframe=wire)
node=scene.add(m)
nodes.append(node)
d=np.random.uniform(-1,1,3)
d=d/np.linalg.norm(d)
directions.append(d)
else:
mesh.apply_scale(scale)
m=pyrender.Mesh.from_trimesh(mesh,smooth=False,wireframe=wire)
node=scene.add(m)
nodes.append(node)
directions.append(np.array([0,1,0]))
return scene,nodes,directions
# =============================
# LOAD ALL MODELS
# =============================
engine_scene,engine_nodes,engine_dir = load_model(ENGINE_PATH,ENGINE_MODEL_SCALE,wire=False)
wire_scene,wire_nodes,wire_dir = load_model(WIREFRAME_PATH,WIREFRAME_MODEL_SCALE,wire=True)
car_scene,car_nodes,car_dir = load_model(CAR_PATH,CAR_MODEL_SCALE,wire=False)
scenes=[engine_scene,wire_scene,car_scene]
nodes_list=[engine_nodes,wire_nodes,car_nodes]
dir_list=[engine_dir,wire_dir,car_dir]
# =============================
# CAMERA + LIGHT
# =============================
camera=pyrender.PerspectiveCamera(yfov=np.pi/3)
cam_pose=np.eye(4)
cam_pose[2,3]=CAMERA_DISTANCE
for sc in scenes:
sc.add(camera,pose=cam_pose)
light=pyrender.DirectionalLight(color=np.ones(3),intensity=3)
sc.add(light,pose=cam_pose)
renderer=pyrender.OffscreenRenderer(WIDTH-RIGHT_PANEL,HEIGHT)
# =============================
# HAND TRACKING
# =============================
mp_hands=mp.solutions.hands
hands=mp_hands.Hands(max_num_hands=2)
# =============================
# CAMERA
# =============================
cap=cv2.VideoCapture(0)
cap.set(3,WIDTH)
cap.set(4,HEIGHT)
# =============================
# VARIABLES
# =============================
mode=0
explode=0
rot_x=0
rot_y=0
prev_center=None
prev_dist=None
# =============================
# SCI-FI TRACKER
# =============================
def tracker(frame,x,y):
c=(255,255,0)
cv2.circle(frame,(x,y),22,c,1)
cv2.circle(frame,(x,y),12,c,2)
cv2.circle(frame,(x,y),3,(255,255,255),-1)
cv2.line(frame,(x-28,y),(x-10,y),c,1)
cv2.line(frame,(x+10,y),(x+28,y),c,1)
cv2.line(frame,(x,y-28),(x,y-10),c,1)
cv2.line(frame,(x,y+10),(x,y+28),c,1)
# =============================
# FINGER EXTENSION CHECK
# =============================
def finger_extended(lm,tip,pip,mcp):
return lm[tip].y < lm[pip].y < lm[mcp].y
# =============================
# WINDOW
# =============================
cv2.namedWindow("AR Garage",cv2.WINDOW_NORMAL)
cv2.setWindowProperty("AR Garage",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
# =============================
# MAIN LOOP
# =============================
while True:
ret,frame=cap.read()
frame=cv2.flip(frame,1)
h,w,_=frame.shape
sec_h=h//3
overlay=frame.copy()
blue=(255,120,0)
cv2.rectangle(overlay,(w-RIGHT_PANEL,0),(w,sec_h),blue,-1)
cv2.rectangle(overlay,(w-RIGHT_PANEL,sec_h),(w,2*sec_h),blue,-1)
cv2.rectangle(overlay,(w-RIGHT_PANEL,2*sec_h),(w,h),blue,-1)
frame=cv2.addWeighted(overlay,0.35,frame,0.65,0)
cv2.rectangle(frame,(w-RIGHT_PANEL,0),(w,sec_h),(255,0,0),3)
cv2.rectangle(frame,(w-RIGHT_PANEL,sec_h),(w,2*sec_h),(255,0,0),3)
cv2.rectangle(frame,(w-RIGHT_PANEL,2*sec_h),(w,h),(255,0,0),3)
rgb=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
res=hands.process(rgb)
index_pts=[]
mid_pts=[]
if res.multi_hand_landmarks:
for hand in res.multi_hand_landmarks:
lm=hand.landmark
if finger_extended(lm,8,6,5):
ix=int(lm[8].x*w)
iy=int(lm[8].y*h)
index_pts.append((ix,iy))
tracker(frame,ix,iy)
if finger_extended(lm,12,10,9):
mx=int(lm[12].x*w)
my=int(lm[12].y*h)
mid_pts.append((mx,my))
tracker(frame,mx,my)
if len(index_pts)>0:
x,y=index_pts[0]
if x>w-RIGHT_PANEL:
if y0:
scene=scenes[mode-1]
nodes=nodes_list[mode-1]
dirs=dir_list[mode-1]
Rx=trimesh.transformations.rotation_matrix(rot_x,[1,0,0])
Ry=trimesh.transformations.rotation_matrix(rot_y,[0,1,0])
R=np.dot(Ry,Rx)
for i,node in enumerate(nodes):
d=dirs[i]
T=np.eye(4)
T[:3,3]=d*explode*EXPLODE_MULT
pose=np.dot(R,T)
scene.set_pose(node,pose)
color,depth=renderer.render(scene)
mask=depth>0
frame[:,0:w-RIGHT_PANEL][mask]=color[mask]
cv2.imshow("AR Garage",frame)
if cv2.waitKey(1)==27:
break
cap.release()
renderer.delete()
cv2.destroyAllWindows()
Code Explanation
- OpenCV captures webcam frames.
- MediaPipe detects hand landmarks.
- PyRender renders the 3D models.
- Finger gestures control model interaction.
How to Run
- Install required libraries
- Download the 3D models
- Update model paths
- Run the Python script
Comments