Augmented Reality
Augmented Reality(AR) ist die computergestützte Erweiterung der Realität, wie wir sie als Menschen wahrnehmen.
AR kann auf alle Sinnesmodalitäten angewendet werden, im Weiteren beschäftigen wir uns nur mit visuell wahrgenommener AR.
AR hat normalerweise 3 Features:
Echtzeitinteraktion
Die Kombination der Realität mit virtueller Realität
Die räumliche Registrierung von 3D Objekten in der Realität und virtuell.
AR Proof of Concept
Inspiriert von augmented-reality
# Notwendig für AR
import cv2
import numpy as np
OBJ File, die geometrische Beschreibung der Animation
Was ist ein OBJ file? Hier ein Beispiel:
# List of geometric vertices, with (x, y, z [,w]) coordinates, w is optional and defaults to 1.0.
v 0.123 0.234 0.345 1.0
v ...
...
# List of texture coordinates, in (u, [,v ,w]) coordinates, these will vary between 0 and 1. v, w are optional and default to 0.
vt 0.500 1 [0]
vt ...
...
# List of vertex normals in (x,y,z) form; normals might not be unit vectors.
vn 0.707 0.000 0.707
vn ...
...
# Parameter space vertices in ( u [,v] [,w] ) form; free form geometry statement ( see below )
vp 0.310000 3.210000 2.100000
vp ...
...
# Polygonal face element (see below)
f 1 2 3
f 3/1 4/2 5/3
f 6/4/1 3/5/3 7/6/5
f 7//1 8//2 9//3
f ...
...
# Line element (see below)
l 5 8 1 2 4 9
Implementierung
def render(img, obj, projection, model, color=False):
"""
Render a loaded obj model into the current video frame
"""
scale = 1
vertices = obj.vertices
scale_matrix = np.eye(3) * scale
h, w = model.shape
for face in obj.faces:
face_vertices = face[0]
points = np.array([vertices[vertex - 1] for vertex in face_vertices])
points = np.dot(points, scale_matrix)
# render model in the middle of the reference surface. To do so,
# model points must be displaced
points = np.array([[p[0] + w / 2, p[1] + h / 2, p[2]] for p in points])
dst = cv2.perspectiveTransform(points.reshape(-1, 1, 3), projection)
imgpts = np.int32(dst)
if color is False:
cv2.fillConvexPoly(img, imgpts, DEFAULT_COLOR)
else:
color = hex_to_rgb(face[-1])
color = color[::-1] # reverse
cv2.fillConvexPoly(img, imgpts, color)
return img
def projection_matrix(camera_parameters, homography):
"""
From the camera calibration matrix and the estimated homography
compute the 3D projection matrix
"""
# Compute rotation along the x and y axis as well as the translation
homography = homography * (-1)
rot_and_transl = np.dot(np.linalg.inv(camera_parameters), homography)
col_1 = rot_and_transl[:, 0]
col_2 = rot_and_transl[:, 1]
col_3 = rot_and_transl[:, 2]
# normalise vectors
l = math.sqrt(np.linalg.norm(col_1, 2) * np.linalg.norm(col_2, 2))
rot_1 = col_1 / l
rot_2 = col_2 / l
translation = col_3 / l
# compute the orthonormal basis
c = rot_1 + rot_2
p = np.cross(rot_1, rot_2)
d = np.cross(c, p)
rot_1 = np.dot(c / np.linalg.norm(c, 2) + d / np.linalg.norm(d, 2), 1 / math.sqrt(2))
rot_2 = np.dot(c / np.linalg.norm(c, 2) - d / np.linalg.norm(d, 2), 1 / math.sqrt(2))
rot_3 = np.cross(rot_1, rot_2)
# finally, compute the 3D projection matrix from the model to the current frame
projection = np.stack((rot_1, rot_2, rot_3, translation)).T
return np.dot(camera_parameters, projection)
def main():
camera_parameters = np.array([[400, 0, 320], [0, 400, 240], [0, 0, 1]])
# create ORB keypoint detector
orb = cv2.ORB_create()
# create BruteForceMatcher object based on hamming distance
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
# load the reference surface that will be searched in the video stream
dir_name = path
model = cv2.imread(os.path.join(dir_name, 'reference/reference.jpg'), 0)
# Compute model keypoints and its descriptors
kp_model, des_model = orb.detectAndCompute(model, None)
# Load 3D model from OBJ file
obj = OBJ(os.path.join(dir_name, 'models/fox.obj'), swapyz=True)
# init video capture
cap = cv2.VideoCapture(0)
display_handle=display(None, display_id=True)
while True:
if stopButton.value==True:
cap.release()
display_handle.update(None)
return
sleep(0.05)
ret, frame = cap.read()
#frame = cv2.flip(frame, 1)
try:
# find and draw the keypoints of the frame
kp_frame, des_frame = orb.detectAndCompute(frame, None)
# match frame descriptors with model descriptors
matches = bf.match(des_model, des_frame)
# sort them in the order of their distance
# the lower the distance, the better the match
matches = sorted(matches, key=lambda x: x.distance)
# differenciate between source points and destination points
src_pts = np.float32([kp_model[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp_frame[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
# compute Homography
homography, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, homog_slider.value)
if recButton.value:
# Draw a rectangle that marks the found model in the frame
h, w = model.shape
pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
# project corners into frame
dst = cv2.perspectiveTransform(pts, homography)
# connect them with lines
frame = cv2.polylines(frame, [np.int32(dst)], True, 255, 3, cv2.LINE_AA)
# if a valid homography matrix was found render cube on model plane
if match_slider.value <= len(matches):
if homography is not None:
try:
projection = projection_matrix(camera_parameters, homography)
frame = render(frame, obj, projection, model, scale_slider.value, False)
except Exception:
pass
if matchButton.value:
frame = cv2.drawMatches(model, kp_model, frame, kp_frame, matches, 0, flags=2)
except Exception:
pass
show_frame(frame, display_handle)
main()