반응형

동기 함수인 detect()를 사용해 카메라 영상 내 사람의 자세를 감지해 보자.

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy as np
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
def draw_landmarks_on_image(rgb_image, detection_result, bg_black):
  pose_landmarks_list = detection_result.pose_landmarks
  # Black Background
  if bg_black:
      annotated_image = np.zeros_like(rgb_image)
  else:
      annotated_image = np.copy(rgb_image)
  
  # Loop through the detected poses to visualize.
  for idx in range(len(pose_landmarks_list)):
    pose_landmarks = pose_landmarks_list[idx]
 
    # Draw the pose landmarks.
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      pose_landmarks_proto,
      solutions.pose.POSE_CONNECTIONS,
      solutions.drawing_styles.get_default_pose_landmarks_style())
  return annotated_image
 
# Create an PoseLandmarker object.
base_options = python.BaseOptions(model_asset_path='pose_landmarker_full.task')
options = vision.PoseLandmarkerOptions(base_options=base_options, output_segmentation_masks=True)
detector = vision.PoseLandmarker.create_from_options(options)
 
cap = cv2.VideoCapture(0)
 
while True:
    # Load the input frame.
    ret, cv_frame = cap.read()
    if not ret:
        break
    frame = mp.Image(image_format = mp.ImageFormat.SRGB, data = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2RGB))
 
    # Detect pose landmarks from the input image.
    detection_result = detector.detect(frame)
    
    # Process the detection result. In this case, visualize it.
    annotated_frame = draw_landmarks_on_image(frame.numpy_view(), detection_result, True)
    cv2.imshow('sean', cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))
 
    key = cv2.waitKey(25)
    if key == 27# ESC
        break
 
if cap.isOpened():
    cap.release()
cv2.destroyAllWindows()
 

 

이번엔 비동기 detect_async()를 사용해 자세를 감지해 보자.

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import time
import numpy as np
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
landmark_result = None
 
# The user-defined result callback for processing live stream data.
# The result callback should only be specified when the running mode is set to the live stream mode.
# The result_callback provides:
# The pose landmarker detection results.
# The input image that the pose landmarker runs on.
# The input timestamp in milliseconds.
def print_result(result: vision.PoseLandmarkerResult, output_image: mp.Image, timestamp_ms: int):
    global landmark_result
    landmark_result = result
 
    #print(output_image.numpy_view())
    # output_image에 접근은 가능하지만 이 콜백 함수에서 cv2.imshow()를 이용한 이미지 출력은 안되는거 같다.
    # 여러가지 방법으로 해 봤지만 정상적인 작동은 되지 않는다.
        
    # Structure of PoseLandmakerResult
    # mp.tasks.vision.PoseLandmarkerResult(
    # pose_landmarks: List[List[landmark_module.NormalizedLandmark]],
    # pose_world_landmarks: List[List[landmark_module.Landmark]],
    # segmentation_masks: Optional[List[image_module.Image]] = None
    # )
    
    #print('pose landmarker result: {}'.format(result))
    #print("pose landmark: ", result.pose_landmarks[0][0].visibility)
    #print("pose world landmark: ", result.pose_world_landmarks[0][0].visibility)
 
    # pose_landmarks_list = result.pose_landmarks    
    # for idx in range(len(pose_landmarks_list)):
    #     pose_landmarks = pose_landmarks_list[idx]        
    #     for landmark in pose_landmarks:
    #         print("x: %.2f, y: %.2f, z: %.2f visibility: %.2f, presence: %.2f" %(landmark.x, landmark.y,
    #               landmark.z, landmark.visibility, landmark.presence))
 
def draw_landmarks_on_image(rgb_image, detection_result, bg_black):
    # Black Background
    if bg_black:
        annotated_image = np.zeros_like(rgb_image)
    else:
        annotated_image = np.copy(rgb_image)
    
    # 비동기 detect_async()가 사용 되었기 때문에 처음 몇 프레임은 detection_result가 None일 수 있다.
    # 또, 이미지(프레임)에 사람이 없을땐 detection_result.pose_landmarks 리스트가 비어있게 된다.
    # 그에 대한 처리를 하지 않으면 에러가 발생한다.
    if detection_result is None or detection_result.pose_landmarks == []:
        return annotated_image
    
    pose_landmarks_list = detection_result.pose_landmarks
        
    # Loop through the detected poses to visualize.
    for idx in range(len(pose_landmarks_list)):
        pose_landmarks = pose_landmarks_list[idx]
    
    # Draw the pose landmarks.
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
        landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
        annotated_image,
        pose_landmarks_proto,
        solutions.pose.POSE_CONNECTIONS,
        solutions.drawing_styles.get_default_pose_landmarks_style())
 
    return annotated_image
 
base_options = python.BaseOptions(model_asset_path='pose_landmarker_full.task')
options = vision.PoseLandmarkerOptions(base_options=base_options,running_mode=mp.tasks.vision.RunningMode.LIVE_STREAM,
                                       result_callback=print_result, output_segmentation_masks=False)
# The running mode of the task. Default to the image mode. PoseLandmarker has three running modes:
# 1) The image mode for detecting pose landmarks on single image inputs.
# 2) The video mode for detecting pose landmarks on the decoded frames of a video.
# 3) The live stream mode for detecting pose landmarks on the live stream of input data, such as from camera.
# In this mode, the "result_callback" below must be specified to receive the detection results asynchronously.
detector = vision.PoseLandmarker.create_from_options(options)
 
cap = cv2.VideoCapture(0)
 
while True:
    ret, cv_image = cap.read()
    if not ret:
        break
    frame = mp.Image(image_format = mp.ImageFormat.SRGB, data = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
        
    # Sends live image data to perform pose landmarks detection.
    # The results will be available via the "result_callback" provided in the PoseLandmarkerOptions.
    # Only use this method when the PoseLandmarker is created with the live stream running mode.
    # Only use this method when the PoseLandmarker is created with the live stream running mode.
    # The input timestamps should be monotonically increasing for adjacent calls of this method.
    # This method will return immediately after the input image is accepted. The results will be available via
    # the result_callback provided in the PoseLandmarkerOptions. The detect_async method is designed to process
    # live stream data such as camera input. To lower the overall latency, pose landmarker may drop the input
    # images if needed. In other words, it's not guaranteed to have output per input image.
    detector.detect_async(frame, int(time.time()*1000))
 
    annotated_image = draw_landmarks_on_image(frame.numpy_view(), landmark_result, True)
    cv2.imshow('sean', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
    
    key = cv2.waitKey(25)
    if key == 27# ESC
        break
 
if cap.isOpened():
    cap.release()
cv2.destroyAllWindows()
detector.close()
 

 

 

 

이번엔 감지한 자세를 부위별로 나눠 위치 정보를 표시해 보자.

 

 

0 - nose
1 - left eye (inner)
2 - left eye
3 - left eye (outer)
4 - right eye (inner)
5 - right eye
6 - right eye (outer)
7 - left ear
8 - right ear
9 - mouth (left)
10 - mouth (right)
11 - left shoulder
12 - right shoulder
13 - left elbow
14 - right elbow
15 - left wrist
16 - right wrist
17 - left pinky
18 - right pinky
19 - left index
20 - right index
21 - left thumb
22 - right thumb
23 - left hip
24 - right hip
25 - left knee
26 - right knee
27 - left ankle
28 - right ankle
29 - left heel
30 - right heel
31 - left foot index
32 - right foot index

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import time
import numpy as np
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
landmark_result = None
 
def print_result(result: vision.PoseLandmarkerResult, output_image: mp.Image, timestamp_ms: int):
    global landmark_result
    landmark_result = result
 
    if result is None or result.pose_landmarks == []:
        return
 
    print("       Nose(0): (x: %.2f, y: %.2f, z: %5.2f, presense: %.2f, visibility: %.2f)"
          %(result.pose_landmarks[0][0].x, result.pose_landmarks[0][0].y, result.pose_landmarks[0][0].z,
            result.pose_landmarks[0][0].presence, result.pose_landmarks[0][0].visibility))
    print("Right Knee(26): (x: %.2f, y: %.2f, z: %5.2f, presense: %.2f, visibility: %.2f)"
          %(result.pose_landmarks[0][26].x, result.pose_landmarks[0][26].y, result.pose_landmarks[0][26].z,
            result.pose_landmarks[0][26].presence, result.pose_landmarks[0][26].visibility))
 
def draw_landmarks_on_image(rgb_image, detection_result, bg_black):    
    if bg_black:
        annotated_image = np.zeros_like(rgb_image)
    else:
        annotated_image = np.copy(rgb_image)
 
    if detection_result is None or detection_result.pose_landmarks == []:
        return annotated_image
    
    pose_landmarks_list = detection_result.pose_landmarks
 
    for idx in range(len(pose_landmarks_list)):
        pose_landmarks = pose_landmarks_list[idx]
 
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
        landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
        annotated_image,
        pose_landmarks_proto,
        solutions.pose.POSE_CONNECTIONS,
        solutions.drawing_styles.get_default_pose_landmarks_style())
 
    return annotated_image
 
base_options = python.BaseOptions(model_asset_path='pose_landmarker_full.task')
options = vision.PoseLandmarkerOptions(base_options=base_options,running_mode=mp.tasks.vision.RunningMode.LIVE_STREAM,
                                       result_callback=print_result, output_segmentation_masks=False)
detector = vision.PoseLandmarker.create_from_options(options)
 
cap = cv2.VideoCapture(0)
 
while True:
    ret, cv_image = cap.read()
    if not ret:
        break
    frame = mp.Image(image_format = mp.ImageFormat.SRGB, data = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
 
    detector.detect_async(frame, int(time.time()*1000))
 
    annotated_image = draw_landmarks_on_image(frame.numpy_view(), landmark_result, True)
    cv2.imshow('sean', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
    
    key = cv2.waitKey(25)
    if key == 27# ESC
        break
 
if cap.isOpened():
    cap.release()
cv2.destroyAllWindows()
detector.close()
 

 

 

0번 코, 26번 오른쪽 무릎에 대한 정보가 표시된다.

 

  • The output contains the following normalized coordinates (Landmarks):
  • x and y: Landmark coordinates normalized between 0.0 and 1.0 by the image width (x) and height (y).
  • z: The landmark depth, with the depth at the midpoint of the hips as the origin. The smaller the value, the closer the landmark is to the camera. The magnitude of z uses roughly the same scale as x.
  • visibility: The likelihood of the landmark being visible within the image.

 

※ 참고

A Tutorial on Finger Counting in Real-Time Video in Python with OpenCV and MediaPipe

 

 

반응형
Posted by J-sean
:
반응형

MeidaPipe를 사용해 얼굴 특징을 감지해 보자.

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np
import matplotlib.pyplot as plt
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
def draw_landmarks_on_image(rgb_image, detection_result):
  face_landmarks_list = detection_result.face_landmarks
  annotated_image = np.copy(rgb_image)
 
  # Loop through the detected faces to visualize.
  for idx in range(len(face_landmarks_list)):
    face_landmarks = face_landmarks_list[idx]
 
    # Draw the face landmarks.
    face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    face_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
    ])
 
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_tesselation_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_contours_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_IRISES,
          landmark_drawing_spec=None,
          connection_drawing_spec=mp.solutions.drawing_styles
          .get_default_face_mesh_iris_connections_style())
 
  return annotated_image
 
def plot_face_blendshapes_bar_graph(face_blendshapes):
  # Extract the face blendshapes category names and scores.
  face_blendshapes_names = [face_blendshapes_category.category_name for face_blendshapes_category in face_blendshapes]
  face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in face_blendshapes]
  # The blendshapes are ordered in decreasing score value.
  face_blendshapes_ranks = range(len(face_blendshapes_names))
 
  fig, ax = plt.subplots(figsize=(1212))
  bar = ax.barh(face_blendshapes_ranks, face_blendshapes_scores, label=[str(x) for x in face_blendshapes_ranks])
  ax.set_yticks(face_blendshapes_ranks, face_blendshapes_names)
  ax.invert_yaxis()
 
  # Label each bar with values
  for score, patch in zip(face_blendshapes_scores, bar.patches):
    plt.text(patch.get_x() + patch.get_width(), patch.get_y(), f"{score:.4f}", va="top")
 
  ax.set_xlabel('Score')
  ax.set_title("Face Blendshapes")
  plt.tight_layout()
  plt.show()
 
# Import the necessary modules.
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
# Create an FaceLandmarker object.
base_options = python.BaseOptions(model_asset_path='face_landmarker.task')
# https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker
options = vision.FaceLandmarkerOptions(base_options=base_options, output_face_blendshapes=True,
                                       output_facial_transformation_matrixes=True, num_faces=1)
detector = vision.FaceLandmarker.create_from_options(options)
 
# Load the input image.
image = mp.Image.create_from_file("face.jpg")
 
# Detect face landmarks from the input image.
detection_result = detector.detect(image)
 
# Process the detection result. In this case, visualize it.
annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
cv2.imshow('sean', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
cv2.waitKey(0)
 

 

face_landmarker.task
3.58MB

 

소스를 입력하고 실행한다.

 

원래 face.jpg 이미지

 

MediaPipe로 분석한 이미지

 

결과에는 각 특징의 좌표 뿐만 아니라 blendshapes라는 얼굴 표정 데이터도 함께 담겨있다. 확인해 보자.

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# The Face Landmarker returns a FaceLandmarkerResult object for each detection run. The result
# object contains a face mesh for each detected face, with coordinates for each face landmark.
# Optionally, the result object can also contain blendshapes, which denote facial expressions,
# and a facial transformation matrix to apply face effects on the detected landmarks.
score = []
for i in range(len(detection_result.face_blendshapes[0])):    
    score.append(detection_result.face_blendshapes[0][i].score)
score_sorted = np.sort(score)[::-1# [::-1] = 내림차순
score_sorted_index = np.argsort(score)[::-1]
 
for i in range(len(score_sorted)):
    if score_sorted[i] < 0.4# 40% 이상의 표정만 출력
        break
    print("%d: %.2f, %s" %(i, score_sorted[i],
                           detection_result.face_blendshapes[0][score_sorted_index[i]].category_name))
 

 

 

 

원래 이미지.

 

MediaPipe로 분석한 이미지

 

특징을 분석해 얻은 표정 데이터가 확률과 함께 출력된다.

 

반응형
Posted by J-sean
:
반응형

MediaPipe를 사용해 자세 특징을 감지해 보자

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import numpy as np
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
def draw_landmarks_on_image(rgb_image, detection_result):
  pose_landmarks_list = detection_result.pose_landmarks
  annotated_image = np.copy(rgb_image)
 
  # Loop through the detected poses to visualize.
  for idx in range(len(pose_landmarks_list)):
    pose_landmarks = pose_landmarks_list[idx]
 
    # Draw the pose landmarks.
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      pose_landmarks_proto,
      solutions.pose.POSE_CONNECTIONS,
      solutions.drawing_styles.get_default_pose_landmarks_style())
  return annotated_image
 
# Create an PoseLandmarker object.
base_options = python.BaseOptions(model_asset_path='pose_landmarker_full.task')
# https://ai.google.dev/edge/mediapipe/solutions/vision/pose_landmarker
options = vision.PoseLandmarkerOptions(base_options=base_options, output_segmentation_masks=True)
detector = vision.PoseLandmarker.create_from_options(options)
 
# Load the input image.
image = mp.Image.create_from_file("pose.jpg")
 
# Detect pose landmarks from the input image.
detection_result = detector.detect(image)
 
# Process the detection result. In this case, visualize it.
annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
cv2.imshow('sean', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
cv2.waitKey(0)
 
# Visualize the pose segmentation mask.
# segmentation_mask = detection_result.segmentation_masks[0].numpy_view()
# visualized_mask = np.repeat(segmentation_mask[:, :, np.newaxis], 3, axis=2) * 255
# cv2.imshow('sean', visualized_mask)
# cv2.waitKey(0)
 

 

pose_landmarker_full.task
8.96MB

 

소스를 입력하고 실행한다.

 

원래 pose.jpg 이미지

 

MediaPipe로 처리한 이미지

 

 

0 - nose
1 - left eye (inner)
2 - left eye
3 - left eye (outer)
4 - right eye (inner)
5 - right eye
6 - right eye (outer)
7 - left ear
8 - right ear
9 - mouth (left)
10 - mouth (right)
11 - left shoulder
12 - right shoulder
13 - left elbow
14 - right elbow
15 - left wrist
16 - right wrist
17 - left pinky
18 - right pinky
19 - left index
20 - right index
21 - left thumb
22 - right thumb
23 - left hip
24 - right hip
25 - left knee
26 - right knee
27 - left ankle
28 - right ankle
29 - left heel
30 - right heel
31 - left foot index
32 - right foot index

 

반응형
Posted by J-sean
:
반응형

MediaPipe를 사용해 얼굴(눈, 코, 입, 귀)을 감지해 보자.

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from typing import Tuple, Union
import math
import numpy as np
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (25500)  # red
 
def _normalized_to_pixel_coordinates(
    normalized_x: float, normalized_y: float, image_width: int,
    image_height: int-> Union[None, Tuple[intint]]:
  """Converts normalized value pair to pixel coordinates."""
 
  # Checks if the float value is between 0 and 1.
  def is_valid_normalized_value(value: float) -> bool:
    return (value > 0 or math.isclose(0, value)) and (value < 1 or
                                                      math.isclose(1, value))
 
  if not (is_valid_normalized_value(normalized_x) and
          is_valid_normalized_value(normalized_y)):
    # TODO: Draw coordinates even if it's outside of the image bounds.
    return None
  x_px = min(math.floor(normalized_x * image_width), image_width - 1)
  y_px = min(math.floor(normalized_y * image_height), image_height - 1)
  return x_px, y_px
 
def visualize(image, detection_result) -> np.ndarray:
  """Draws bounding boxes and keypoints on the input image and return it.
  Args:
    image: The input RGB image.
    detection_result: The list of all "Detection" entities to be visualize.
  Returns:
    Image with bounding boxes.
  """
  annotated_image = image.copy()
  height, width, _ = image.shape
 
  for detection in detection_result.detections:
    # Draw bounding_box
    bbox = detection.bounding_box
    start_point = bbox.origin_x, bbox.origin_y
    end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
    cv2.rectangle(annotated_image, start_point, end_point, TEXT_COLOR, 3)
 
    # Draw keypoints
    for keypoint in detection.keypoints:
      keypoint_px = _normalized_to_pixel_coordinates(keypoint.x, keypoint.y, 
                                                     width, height)
      color, thickness, radius = (02550), 22
      cv2.circle(annotated_image, keypoint_px, thickness, color, radius)
 
    # Draw label and score
    category = detection.categories[0]
    category_name = category.category_name
    category_name = '' if category_name is None else category_name
    probability = round(category.score, 2)
    result_text = category_name + ' (' + str(probability) + ')'
    text_location = (MARGIN + bbox.origin_x,
                     MARGIN + ROW_SIZE + bbox.origin_y)
    cv2.putText(annotated_image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
                FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)
 
  return annotated_image
 
# Create an FaceDetector object.
base_options = python.BaseOptions(model_asset_path='blaze_face_short_range.tflite')
# https://ai.google.dev/edge/mediapipe/solutions/vision/face_detector
options = vision.FaceDetectorOptions(base_options=base_options)
detector = vision.FaceDetector.create_from_options(options)
 
# Load the input image.
image = mp.Image.create_from_file('face.jpg')
#cv_image = cv2.imread('face.jpg')
#image = mp.Image(image_format = mp.ImageFormat.SRGB,
#                 data = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
# https://ai.google.dev/edge/api/mediapipe/python/mp/Image
 
# Detect faces in the input image.
detection_result = detector.detect(image)
 
# Process the detection result. In this case, visualize it.
image_copy = np.copy(image.numpy_view())
annotated_image = visualize(image_copy, detection_result)
rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
cv2.imshow('sean', rgb_annotated_image)
cv2.waitKey(0)
 

 

blaze_face_short_range.tflite
0.22MB

 

소스를 입력하고 실행한다.

 

원래 Face.jpg 이미지

 

MediaPipe로 처리한 이미지

눈, 코, 입, 귀가 모두 정확히 감지 된다.

 

반응형
Posted by J-sean
:
반응형

MediaPipe를 이용해 손 특징을 감지해 보자.

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import cv2
import mediapipe as mp
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (8820554# vibrant green
 
def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)
 
  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]
 
    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())
 
    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN
 
    # Draw handedness (left or right hand) on the image.
    cv2.putText(annotated_image, f"{handedness[0].category_name}", (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)
 
  return annotated_image
 
# Create an HandLandmarker object.
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
# https://ai.google.dev/edge/mediapipe/solutions/vision/hand_landmarker
options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)
 
# Load the input image.
image = mp.Image.create_from_file("hand.jpg")
#cv_image = cv2.imread('hand.jpg')
#image = mp.Image(image_format = mp.ImageFormat.SRGB,
#                 data = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
# https://ai.google.dev/edge/api/mediapipe/python/mp/Image
 
# Detect hand landmarks from the input image.
detection_result = detector.detect(image)
 
# Process the classification result. In this case, visualize it.
annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
cv2.imshow('sean', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
cv2.waitKey(0)
 

 

hand_landmarker.task
7.46MB

 

소스를 입력하고 실행한다.

 

원래 hand.jpg 이미지

 

MediaPipe로 처리한 이미지

 

반응형
Posted by J-sean
:
반응형

MediaPipe를 이용해 객체를 감지해 보자.

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import numpy as np
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (25500)  # red
 
def visualize(image, detection_result) -> np.ndarray:
  """Draws bounding boxes on the input image and return it.
  Args:
    image: The input RGB image.
    detection_result: The list of all "Detection" entities to be visualize.
  Returns:
    Image with bounding boxes.
  """
  for detection in detection_result.detections:
    # Draw bounding_box
    bbox = detection.bounding_box
    start_point = bbox.origin_x, bbox.origin_y
    end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
    cv2.rectangle(image, start_point, end_point, TEXT_COLOR, 3)
 
    # Draw label and score
    category = detection.categories[0]
    category_name = category.category_name
    probability = round(category.score, 2)
    result_text = category_name + ' (' + str(probability) + ')'
    text_location = (MARGIN + bbox.origin_x, MARGIN + ROW_SIZE + bbox.origin_y)
    cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
                FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)
 
  return image
 
# Create an ObjectDetector object.
base_options = python.BaseOptions(model_asset_path='efficientdet_lite2.tflite')
# https://ai.google.dev/edge/mediapipe/solutions/vision/object_detector
options = vision.ObjectDetectorOptions(base_options=base_options, score_threshold=0.5)
detector = vision.ObjectDetector.create_from_options(options)
 
# Load the input image.
image = mp.Image.create_from_file('image.jpg')
#cv_image = cv2.imread('image.jpg')
#image = mp.Image(image_format = mp.ImageFormat.SRGB,
#                 data = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
# https://ai.google.dev/edge/api/mediapipe/python/mp/Image
 
# Detect objects in the input image.
detection_result = detector.detect(image)
 
# Process the detection result. In this case, visualize it.
image_copy = np.copy(image.numpy_view())
annotated_image = visualize(image_copy, detection_result)
rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
 
cv2.imshow('sean', rgb_annotated_image)
cv2.waitKey(0)
 

 

efficientdet_lite0.tflite
4.39MB
efficientdet_lite2.tflite
7.17MB

 

여러가지 이미지 파일을 준비해서 소스를 입력하고 실행한다.

 

개와 고양이를 정확히 인식했다.

 

사무실 주변의 사람과 다양한 물체를 감지한다.

 

이제 내가 감지하고 싶은 오브젝트로 감지 모델을 만들어 보자.

Object detection model customization guide

전체적으로 위 링크를 참고하면 된다. 여기서는 데이터셋을 준비하는 과정에 대해 조금 더 자세히 알아보자.

 

labels.json 파일은 아래와 같은 구조로 되어 있다.

{
  "categories":[
    {"id":1, "name":<cat1_name>},
    ...
  ],
  "images":[
    {"id":0, "file_name":"<img0>.<jpg/jpeg>"},
    ...
  ],
  "annotations":[
    {"id":0, "image_id":0, "category_id":1, "bbox":[x-top left, y-top left, width, height]},
    ...
  ]
}

 

실제 labels.json 파일에서 첫 번째(0) 이미지에 대한 내용을 살펴보자.

{
  "images": [{"id": 0, "file_name": "IMG_0525.jpg"}, ...],
  "annotations": [{"image_id": 0, "bbox": [349, 61, 264, 351], "category_id": 2}, ...],
  "categories": [{"id": 0, "name": "background"}, {"id": 1, "name": "android"}, {"id": 2, "name": "pig_android"}]
}

 

첫 번째 파일이므로 image_id는 0으로, '돼지 안드로이드' 인형이므로 category_id는 2로 세팅 되었다.

bbox는 bounding box이고 [왼쪽 x좌표, 왼쪽 y좌표, 폭, 높이]를 의미한다.

categories에서 첫 번째 0번 id는 실제 쓰이지 않더라도 무조건 background로 지정되어야 한다.

 

labels.json
0.01MB

 

 

IMG_0525.jpg

 

bbox가 표시된 IMG_0525.jpg

 

반응형
Posted by J-sean
:
반응형

Keras 관련 에러를 몇 가지 확인하고 해결해 보자.

 

1)

dense = keras.layers.Dense(10, activation='softmax', input_shape=(784, ))

위 명령을 실행하면 아래와 같은 경고가 출력된다.

UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.

경고이므로 무시하고 넘어간다.


model = keras.Sequential(dense)
이어서 위 명령은 아래와 같은 에러가 출력된다.

TypeError: 'Dense' object is not iterable
아래와 같이 바꿔서 해결한다.
model = keras.Sequential([dense])

 

아니면 처음부터 아래와 같이 입력하면 경고나 에러 없이 진행 된다.
model = keras.Sequential([keras.Input(shape=(784, )), keras.layers.Dense(10, activation='softmax')])

 

2)

model.compile(loss='sparse_categorical_crossentropy', metrics='accuracy')

위 명령을 실행하면 아래와 같은 에러가 출력된다.

ValueError: Expected `metrics` argument to be a list, tuple, or dict. Received instead: metrics=accuracy of type <class 'str'>

아래와 같이 바꿔서 해결한다.
model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'])

 

 

 

※ 참고

혼자 공부하는 머신러닝 + 딥러닝

 

반응형
Posted by J-sean
:

[ML] MNIST pandas

AI, ML, DL 2024. 12. 21. 17:24 |
반응형

MNIST 데이터를 pandas로 읽고 출력해 보자.

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#from sklearn.datasets import fetch_openml
 
#mnist = fetch_openml('mnist_784', as_frame=False)
#X, y = mnist.data, mnist.target
 
np.set_printoptions(linewidth=np.inf)
 
mnist = pd.read_csv("mnist_784.csv")
print("■ First 5 Data:")
print(mnist.iloc[0:50:-1])
print("■ First 5 Targets:")
print(mnist.iloc[0:5-1])
 
FirstImage = mnist.iloc[00:-1].to_numpy().reshape(2828)
# values: Return a Numpy representation of the DataFrame,
#         the axes labels will be removed.
 
print("■ First Image:\n", FirstImage)
 
plt.imshow(FirstImage, cmap="binary")
plt.axis("off")
plt.show()
 

 

 

첫 번째 데이터를 출력하기만 해도 대충 숫자가 보인다.

 

첫 번째 데이터.

 

1
2
3
4
5
6
7
8
9
10
11
12
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
np.set_printoptions(linewidth=np.inf)
 
mnist = pd.read_csv("mnist_784.csv")
= mnist.iloc[:, :-1].to_numpy().reshape(-12828)
= mnist.iloc[:, -1].to_numpy()
 
print(X[0])
print("Target: ", y[0])
 

 

 

 

Error: the number of classes has to be greater than one; got 1 class

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDClassifier
 
np.set_printoptions(linewidth=np.inf)
 
mnist = pd.read_csv("mnist_784.csv")
= mnist.iloc[:, :-1].to_numpy()
= mnist.iloc[:, -1].to_numpy().astype('str')
# .astype('str')을 삭제하면 y에 숫자 데이터가 저장된다. 그러면 21, 22 라인에서
# 문자5('5')와 비교하기 때문에 모두 False가 되어버려 타겟이 False 클래스 하나만
# 갖게 되어 아래와 같은 에러가 발생한다.
# The number of classes has to be greater than one; got 1 class
# 아니면 .astype('str')을 삭제하고 21, 22 라인에서 '5'가 아닌 5와 비교해도 된다.
 
first_digit = X[0]
# 첫 번째 데이터 지정.
 
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
y_train_5 = (y_train == '5')
y_test_5 = (y_test == '5')
 
sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(X_train, y_train_5)
print(sgd_clf.predict([first_digit]))
# 첫 번째 데이터가 5인지 확인.
 

 

첫 번째 데이터가 5인지 확인하는 코드.

결과로 True가 출력된다.

 

반응형
Posted by J-sean
: