'AI, ML, DL' 카테고리의 글 목록

[MediaPipe] Async Pose Landmark Detection 비동기 자세 특징 감지

AI, ML, DL 2025. 2. 12. 09:49 |

동기 함수인 detect()를 사용해 카메라 영상 내 사람의 자세를 감지해 보자.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

import numpy as np
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
def draw_landmarks_on_image(rgb_image, detection_result, bg_black):
  pose_landmarks_list = detection_result.pose_landmarks
  # Black Background
  if bg_black:
      annotated_image = np.zeros_like(rgb_image)
  else:
      annotated_image = np.copy(rgb_image)
  
  # Loop through the detected poses to visualize.
  for idx in range(len(pose_landmarks_list)):
    pose_landmarks = pose_landmarks_list[idx]
 
    # Draw the pose landmarks.
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      pose_landmarks_proto,
      solutions.pose.POSE_CONNECTIONS,
      solutions.drawing_styles.get_default_pose_landmarks_style())
  return annotated_image
 
# Create an PoseLandmarker object.
base_options = python.BaseOptions(model_asset_path='pose_landmarker_full.task')
options = vision.PoseLandmarkerOptions(base_options=base_options, output_segmentation_masks=True)
detector = vision.PoseLandmarker.create_from_options(options)
 
cap = cv2.VideoCapture(0)
 
while True:
    # Load the input frame.
    ret, cv_frame = cap.read()
    if not ret:
        break
    frame = mp.Image(image_format = mp.ImageFormat.SRGB, data = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2RGB))
 
    # Detect pose landmarks from the input image.
    detection_result = detector.detect(frame)
    
    # Process the detection result. In this case, visualize it.
    annotated_frame = draw_landmarks_on_image(frame.numpy_view(), detection_result, True)
    cv2.imshow('sean', cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))
 
    key = cv2.waitKey(25)
    if key == 27: # ESC
        break
 
if cap.isOpened():
    cap.release()
cv2.destroyAllWindows()

 

이번엔 비동기 detect_async()를 사용해 자세를 감지해 보자.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

import time
import numpy as np
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
landmark_result = None
 
# The user-defined result callback for processing live stream data.
# The result callback should only be specified when the running mode is set to the live stream mode.
# The result_callback provides:
# The pose landmarker detection results.
# The input image that the pose landmarker runs on.
# The input timestamp in milliseconds.
def print_result(result: vision.PoseLandmarkerResult, output_image: mp.Image, timestamp_ms: int):
    global landmark_result
    landmark_result = result
 
    #print(output_image.numpy_view())
    # output_image에 접근은 가능하지만 이 콜백 함수에서 cv2.imshow()를 이용한 이미지 출력은 안되는거 같다.
    # 여러가지 방법으로 해 봤지만 정상적인 작동은 되지 않는다.
        
    # Structure of PoseLandmakerResult
    # mp.tasks.vision.PoseLandmarkerResult(
    # pose_landmarks: List[List[landmark_module.NormalizedLandmark]],
    # pose_world_landmarks: List[List[landmark_module.Landmark]],
    # segmentation_masks: Optional[List[image_module.Image]] = None
    # )
    
    #print('pose landmarker result: {}'.format(result))
    #print("pose landmark: ", result.pose_landmarks[0][0].visibility)
    #print("pose world landmark: ", result.pose_world_landmarks[0][0].visibility)
 
    # pose_landmarks_list = result.pose_landmarks    
    # for idx in range(len(pose_landmarks_list)):
    #     pose_landmarks = pose_landmarks_list[idx]        
    #     for landmark in pose_landmarks:
    #         print("x: %.2f, y: %.2f, z: %.2f visibility: %.2f, presence: %.2f" %(landmark.x, landmark.y,
    #               landmark.z, landmark.visibility, landmark.presence))
 
def draw_landmarks_on_image(rgb_image, detection_result, bg_black):
    # Black Background
    if bg_black:
        annotated_image = np.zeros_like(rgb_image)
    else:
        annotated_image = np.copy(rgb_image)
    
    # 비동기 detect_async()가 사용 되었기 때문에 처음 몇 프레임은 detection_result가 None일 수 있다.
    # 또, 이미지(프레임)에 사람이 없을땐 detection_result.pose_landmarks 리스트가 비어있게 된다.
    # 그에 대한 처리를 하지 않으면 에러가 발생한다.
    if detection_result is None or detection_result.pose_landmarks == []:
        return annotated_image
    
    pose_landmarks_list = detection_result.pose_landmarks
        
    # Loop through the detected poses to visualize.
    for idx in range(len(pose_landmarks_list)):
        pose_landmarks = pose_landmarks_list[idx]
    
    # Draw the pose landmarks.
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
        landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
        annotated_image,
        pose_landmarks_proto,
        solutions.pose.POSE_CONNECTIONS,
        solutions.drawing_styles.get_default_pose_landmarks_style())
 
    return annotated_image
 
base_options = python.BaseOptions(model_asset_path='pose_landmarker_full.task')
options = vision.PoseLandmarkerOptions(base_options=base_options,running_mode=mp.tasks.vision.RunningMode.LIVE_STREAM,
                                       result_callback=print_result, output_segmentation_masks=False)
# The running mode of the task. Default to the image mode. PoseLandmarker has three running modes:
# 1) The image mode for detecting pose landmarks on single image inputs.
# 2) The video mode for detecting pose landmarks on the decoded frames of a video.
# 3) The live stream mode for detecting pose landmarks on the live stream of input data, such as from camera.
# In this mode, the "result_callback" below must be specified to receive the detection results asynchronously.
detector = vision.PoseLandmarker.create_from_options(options)
 
cap = cv2.VideoCapture(0)
 
while True:
    ret, cv_image = cap.read()
    if not ret:
        break
    frame = mp.Image(image_format = mp.ImageFormat.SRGB, data = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
        
    # Sends live image data to perform pose landmarks detection.
    # The results will be available via the "result_callback" provided in the PoseLandmarkerOptions.
    # Only use this method when the PoseLandmarker is created with the live stream running mode.
    # Only use this method when the PoseLandmarker is created with the live stream running mode.
    # The input timestamps should be monotonically increasing for adjacent calls of this method.
    # This method will return immediately after the input image is accepted. The results will be available via
    # the result_callback provided in the PoseLandmarkerOptions. The detect_async method is designed to process
    # live stream data such as camera input. To lower the overall latency, pose landmarker may drop the input
    # images if needed. In other words, it's not guaranteed to have output per input image.
    detector.detect_async(frame, int(time.time()*1000))
 
    annotated_image = draw_landmarks_on_image(frame.numpy_view(), landmark_result, True)
    cv2.imshow('sean', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
    
    key = cv2.waitKey(25)
    if key == 27: # ESC
        break
 
if cap.isOpened():
    cap.release()
cv2.destroyAllWindows()
detector.close()

 

이번엔 감지한 자세를 부위별로 나눠 위치 정보를 표시해 보자.

0 - nose
1 - left eye (inner)
2 - left eye
3 - left eye (outer)
4 - right eye (inner)
5 - right eye
6 - right eye (outer)
7 - left ear
8 - right ear
9 - mouth (left)
10 - mouth (right)
11 - left shoulder
12 - right shoulder
13 - left elbow
14 - right elbow
15 - left wrist
16 - right wrist
17 - left pinky
18 - right pinky
19 - left index
20 - right index
21 - left thumb
22 - right thumb
23 - left hip
24 - right hip
25 - left knee
26 - right knee
27 - left ankle
28 - right ankle
29 - left heel
30 - right heel
31 - left foot index
32 - right foot index

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77

import time
import numpy as np
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
landmark_result = None
 
def print_result(result: vision.PoseLandmarkerResult, output_image: mp.Image, timestamp_ms: int):
    global landmark_result
    landmark_result = result
 
    if result is None or result.pose_landmarks == []:
        return
 
    print("       Nose(0): (x: %.2f, y: %.2f, z: %5.2f, presense: %.2f, visibility: %.2f)"
          %(result.pose_landmarks[0][0].x, result.pose_landmarks[0][0].y, result.pose_landmarks[0][0].z,
            result.pose_landmarks[0][0].presence, result.pose_landmarks[0][0].visibility))
    print("Right Knee(26): (x: %.2f, y: %.2f, z: %5.2f, presense: %.2f, visibility: %.2f)"
          %(result.pose_landmarks[0][26].x, result.pose_landmarks[0][26].y, result.pose_landmarks[0][26].z,
            result.pose_landmarks[0][26].presence, result.pose_landmarks[0][26].visibility))
 
def draw_landmarks_on_image(rgb_image, detection_result, bg_black):    
    if bg_black:
        annotated_image = np.zeros_like(rgb_image)
    else:
        annotated_image = np.copy(rgb_image)
 
    if detection_result is None or detection_result.pose_landmarks == []:
        return annotated_image
    
    pose_landmarks_list = detection_result.pose_landmarks
 
    for idx in range(len(pose_landmarks_list)):
        pose_landmarks = pose_landmarks_list[idx]
 
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
        landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
        annotated_image,
        pose_landmarks_proto,
        solutions.pose.POSE_CONNECTIONS,
        solutions.drawing_styles.get_default_pose_landmarks_style())
 
    return annotated_image
 
base_options = python.BaseOptions(model_asset_path='pose_landmarker_full.task')
options = vision.PoseLandmarkerOptions(base_options=base_options,running_mode=mp.tasks.vision.RunningMode.LIVE_STREAM,
                                       result_callback=print_result, output_segmentation_masks=False)
detector = vision.PoseLandmarker.create_from_options(options)
 
cap = cv2.VideoCapture(0)
 
while True:
    ret, cv_image = cap.read()
    if not ret:
        break
    frame = mp.Image(image_format = mp.ImageFormat.SRGB, data = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
 
    detector.detect_async(frame, int(time.time()*1000))
 
    annotated_image = draw_landmarks_on_image(frame.numpy_view(), landmark_result, True)
    cv2.imshow('sean', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
    
    key = cv2.waitKey(25)
    if key == 27: # ESC
        break
 
if cap.isOpened():
    cap.release()
cv2.destroyAllWindows()
detector.close()

 

The output contains the following normalized coordinates (Landmarks):
x and y: Landmark coordinates normalized between 0.0 and 1.0 by the image width (x) and height (y).
z: The landmark depth, with the depth at the midpoint of the hips as the origin. The smaller the value, the closer the landmark is to the camera. The magnitude of z uses roughly the same scale as x.
visibility: The likelihood of the landmark being visible within the image.

※ 참고

A Tutorial on Finger Counting in Real-Time Video in Python with OpenCV and MediaPipe

저작자표시 비영리 변경금지 (새창열림)

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Face Landmark Detection 얼굴 특징 감지 (1)	2025.02.11
[MediaPipe] Pose Landmark Detection 자세 특징 감지 (0)	2025.02.11
[MediaPipe] Face Detection 얼굴 감지 (0)	2025.02.11
[MediaPipe] Hand Landmark Detection 손 특징 감지 (0)	2025.02.11
[MediaPipe] Object Detection 객체 감지 (1)	2025.02.11

Posted by J-sean

:

[MediaPipe] Face Landmark Detection 얼굴 특징 감지

AI, ML, DL 2025. 2. 11. 21:19 |

MeidaPipe를 사용해 얼굴 특징을 감지해 보자.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90

import numpy as np
import matplotlib.pyplot as plt
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
def draw_landmarks_on_image(rgb_image, detection_result):
  face_landmarks_list = detection_result.face_landmarks
  annotated_image = np.copy(rgb_image)
 
  # Loop through the detected faces to visualize.
  for idx in range(len(face_landmarks_list)):
    face_landmarks = face_landmarks_list[idx]
 
    # Draw the face landmarks.
    face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    face_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
    ])
 
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_tesselation_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_contours_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_IRISES,
          landmark_drawing_spec=None,
          connection_drawing_spec=mp.solutions.drawing_styles
          .get_default_face_mesh_iris_connections_style())
 
  return annotated_image
 
def plot_face_blendshapes_bar_graph(face_blendshapes):
  # Extract the face blendshapes category names and scores.
  face_blendshapes_names = [face_blendshapes_category.category_name for face_blendshapes_category in face_blendshapes]
  face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in face_blendshapes]
  # The blendshapes are ordered in decreasing score value.
  face_blendshapes_ranks = range(len(face_blendshapes_names))
 
  fig, ax = plt.subplots(figsize=(12, 12))
  bar = ax.barh(face_blendshapes_ranks, face_blendshapes_scores, label=[str(x) for x in face_blendshapes_ranks])
  ax.set_yticks(face_blendshapes_ranks, face_blendshapes_names)
  ax.invert_yaxis()
 
  # Label each bar with values
  for score, patch in zip(face_blendshapes_scores, bar.patches):
    plt.text(patch.get_x() + patch.get_width(), patch.get_y(), f"{score:.4f}", va="top")
 
  ax.set_xlabel('Score')
  ax.set_title("Face Blendshapes")
  plt.tight_layout()
  plt.show()
 
# Import the necessary modules.
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
# Create an FaceLandmarker object.
base_options = python.BaseOptions(model_asset_path='face_landmarker.task')
# https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker
options = vision.FaceLandmarkerOptions(base_options=base_options, output_face_blendshapes=True,
                                       output_facial_transformation_matrixes=True, num_faces=1)
detector = vision.FaceLandmarker.create_from_options(options)
 
# Load the input image.
image = mp.Image.create_from_file("face.jpg")
 
# Detect face landmarks from the input image.
detection_result = detector.detect(image)
 
# Process the detection result. In this case, visualize it.
annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
cv2.imshow('sean', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
cv2.waitKey(0)

 

face_landmarker.task

3.58MB

소스를 입력하고 실행한다.

결과에는 각 특징의 좌표 뿐만 아니라 blendshapes라는 얼굴 표정 데이터도 함께 담겨있다. 확인해 보자.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

# The Face Landmarker returns a FaceLandmarkerResult object for each detection run. The result
# object contains a face mesh for each detected face, with coordinates for each face landmark.
# Optionally, the result object can also contain blendshapes, which denote facial expressions,
# and a facial transformation matrix to apply face effects on the detected landmarks.
score = []
for i in range(len(detection_result.face_blendshapes[0])):    
    score.append(detection_result.face_blendshapes[0][i].score)
score_sorted = np.sort(score)[::-1] # [::-1] = 내림차순
score_sorted_index = np.argsort(score)[::-1]
 
for i in range(len(score_sorted)):
    if score_sorted[i] < 0.4: # 40% 이상의 표정만 출력
        break
    print("%d: %.2f, %s" %(i, score_sorted[i],
                           detection_result.face_blendshapes[0][score_sorted_index[i]].category_name))

 

저작자표시 비영리 변경금지 (새창열림)

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Async Pose Landmark Detection 비동기 자세 특징 감지 (0)	2025.02.12
[MediaPipe] Pose Landmark Detection 자세 특징 감지 (0)	2025.02.11
[MediaPipe] Face Detection 얼굴 감지 (0)	2025.02.11
[MediaPipe] Hand Landmark Detection 손 특징 감지 (0)	2025.02.11
[MediaPipe] Object Detection 객체 감지 (1)	2025.02.11

Posted by J-sean

:

[MediaPipe] Pose Landmark Detection 자세 특징 감지

AI, ML, DL 2025. 2. 11. 20:32 |

MediaPipe를 사용해 자세 특징을 감지해 보자

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50

import numpy as np
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
def draw_landmarks_on_image(rgb_image, detection_result):
  pose_landmarks_list = detection_result.pose_landmarks
  annotated_image = np.copy(rgb_image)
 
  # Loop through the detected poses to visualize.
  for idx in range(len(pose_landmarks_list)):
    pose_landmarks = pose_landmarks_list[idx]
 
    # Draw the pose landmarks.
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      pose_landmarks_proto,
      solutions.pose.POSE_CONNECTIONS,
      solutions.drawing_styles.get_default_pose_landmarks_style())
  return annotated_image
 
# Create an PoseLandmarker object.
base_options = python.BaseOptions(model_asset_path='pose_landmarker_full.task')
# https://ai.google.dev/edge/mediapipe/solutions/vision/pose_landmarker
options = vision.PoseLandmarkerOptions(base_options=base_options, output_segmentation_masks=True)
detector = vision.PoseLandmarker.create_from_options(options)
 
# Load the input image.
image = mp.Image.create_from_file("pose.jpg")
 
# Detect pose landmarks from the input image.
detection_result = detector.detect(image)
 
# Process the detection result. In this case, visualize it.
annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
cv2.imshow('sean', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
cv2.waitKey(0)
 
# Visualize the pose segmentation mask.
# segmentation_mask = detection_result.segmentation_masks[0].numpy_view()
# visualized_mask = np.repeat(segmentation_mask[:, :, np.newaxis], 3, axis=2) * 255
# cv2.imshow('sean', visualized_mask)
# cv2.waitKey(0)

 

pose_landmarker_full.task

8.96MB

소스를 입력하고 실행한다.

0 - nose
1 - left eye (inner)
2 - left eye
3 - left eye (outer)
4 - right eye (inner)
5 - right eye
6 - right eye (outer)
7 - left ear
8 - right ear
9 - mouth (left)
10 - mouth (right)
11 - left shoulder
12 - right shoulder
13 - left elbow
14 - right elbow
15 - left wrist
16 - right wrist
17 - left pinky
18 - right pinky
19 - left index
20 - right index
21 - left thumb
22 - right thumb
23 - left hip
24 - right hip
25 - left knee
26 - right knee
27 - left ankle
28 - right ankle
29 - left heel
30 - right heel
31 - left foot index
32 - right foot index

저작자표시 비영리 변경금지 (새창열림)

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Async Pose Landmark Detection 비동기 자세 특징 감지 (0)	2025.02.12
[MediaPipe] Face Landmark Detection 얼굴 특징 감지 (1)	2025.02.11
[MediaPipe] Face Detection 얼굴 감지 (0)	2025.02.11
[MediaPipe] Hand Landmark Detection 손 특징 감지 (0)	2025.02.11
[MediaPipe] Object Detection 객체 감지 (1)	2025.02.11

Posted by J-sean

:

[MediaPipe] Face Detection 얼굴 감지

AI, ML, DL 2025. 2. 11. 19:58 |

MediaPipe를 사용해 얼굴(눈, 코, 입, 귀)을 감지해 보자.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

from typing import Tuple, Union
import math
import numpy as np
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255, 0, 0)  # red
 
def _normalized_to_pixel_coordinates(
    normalized_x: float, normalized_y: float, image_width: int,
    image_height: int) -> Union[None, Tuple[int, int]]:
  """Converts normalized value pair to pixel coordinates."""
 
  # Checks if the float value is between 0 and 1.
  def is_valid_normalized_value(value: float) -> bool:
    return (value > 0 or math.isclose(0, value)) and (value < 1 or
                                                      math.isclose(1, value))
 
  if not (is_valid_normalized_value(normalized_x) and
          is_valid_normalized_value(normalized_y)):
    # TODO: Draw coordinates even if it's outside of the image bounds.
    return None
  x_px = min(math.floor(normalized_x * image_width), image_width - 1)
  y_px = min(math.floor(normalized_y * image_height), image_height - 1)
  return x_px, y_px
 
def visualize(image, detection_result) -> np.ndarray:
  """Draws bounding boxes and keypoints on the input image and return it.
  Args:
    image: The input RGB image.
    detection_result: The list of all "Detection" entities to be visualize.
  Returns:
    Image with bounding boxes.
  """
  annotated_image = image.copy()
  height, width, _ = image.shape
 
  for detection in detection_result.detections:
    # Draw bounding_box
    bbox = detection.bounding_box
    start_point = bbox.origin_x, bbox.origin_y
    end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
    cv2.rectangle(annotated_image, start_point, end_point, TEXT_COLOR, 3)
 
    # Draw keypoints
    for keypoint in detection.keypoints:
      keypoint_px = _normalized_to_pixel_coordinates(keypoint.x, keypoint.y, 
                                                     width, height)
      color, thickness, radius = (0, 255, 0), 2, 2
      cv2.circle(annotated_image, keypoint_px, thickness, color, radius)
 
    # Draw label and score
    category = detection.categories[0]
    category_name = category.category_name
    category_name = '' if category_name is None else category_name
    probability = round(category.score, 2)
    result_text = category_name + ' (' + str(probability) + ')'
    text_location = (MARGIN + bbox.origin_x,
                     MARGIN + ROW_SIZE + bbox.origin_y)
    cv2.putText(annotated_image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
                FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)
 
  return annotated_image
 
# Create an FaceDetector object.
base_options = python.BaseOptions(model_asset_path='blaze_face_short_range.tflite')
# https://ai.google.dev/edge/mediapipe/solutions/vision/face_detector
options = vision.FaceDetectorOptions(base_options=base_options)
detector = vision.FaceDetector.create_from_options(options)
 
# Load the input image.
image = mp.Image.create_from_file('face.jpg')
#cv_image = cv2.imread('face.jpg')
#image = mp.Image(image_format = mp.ImageFormat.SRGB,
#                 data = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
# https://ai.google.dev/edge/api/mediapipe/python/mp/Image
 
# Detect faces in the input image.
detection_result = detector.detect(image)
 
# Process the detection result. In this case, visualize it.
image_copy = np.copy(image.numpy_view())
annotated_image = visualize(image_copy, detection_result)
rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
cv2.imshow('sean', rgb_annotated_image)
cv2.waitKey(0)

 

blaze_face_short_range.tflite

0.22MB

소스를 입력하고 실행한다.

눈, 코, 입, 귀가 모두 정확히 감지 된다.

저작자표시 비영리 변경금지 (새창열림)

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Face Landmark Detection 얼굴 특징 감지 (1)	2025.02.11
[MediaPipe] Pose Landmark Detection 자세 특징 감지 (0)	2025.02.11
[MediaPipe] Hand Landmark Detection 손 특징 감지 (0)	2025.02.11
[MediaPipe] Object Detection 객체 감지 (1)	2025.02.11
[DL] Keras(TensorFlow) 관련 에러 해결 (0)	2025.01.15

Posted by J-sean

:

[MediaPipe] Hand Landmark Detection 손 특징 감지

AI, ML, DL 2025. 2. 11. 19:28 |

MediaPipe를 이용해 손 특징을 감지해 보자.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

import numpy as np
import cv2
import mediapipe as mp
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green
 
def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)
 
  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]
 
    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())
 
    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN
 
    # Draw handedness (left or right hand) on the image.
    cv2.putText(annotated_image, f"{handedness[0].category_name}", (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)
 
  return annotated_image
 
# Create an HandLandmarker object.
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
# https://ai.google.dev/edge/mediapipe/solutions/vision/hand_landmarker
options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)
 
# Load the input image.
image = mp.Image.create_from_file("hand.jpg")
#cv_image = cv2.imread('hand.jpg')
#image = mp.Image(image_format = mp.ImageFormat.SRGB,
#                 data = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
# https://ai.google.dev/edge/api/mediapipe/python/mp/Image
 
# Detect hand landmarks from the input image.
detection_result = detector.detect(image)
 
# Process the classification result. In this case, visualize it.
annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
cv2.imshow('sean', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
cv2.waitKey(0)

 

hand_landmarker.task

7.46MB

소스를 입력하고 실행한다.

저작자표시 비영리 변경금지 (새창열림)

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Pose Landmark Detection 자세 특징 감지 (0)	2025.02.11
[MediaPipe] Face Detection 얼굴 감지 (0)	2025.02.11
[MediaPipe] Object Detection 객체 감지 (1)	2025.02.11
[DL] Keras(TensorFlow) 관련 에러 해결 (0)	2025.01.15
[ML] MNIST pandas (0)	2024.12.21

Posted by J-sean

:

[MediaPipe] Object Detection 객체 감지

AI, ML, DL 2025. 2. 11. 15:36 |

MediaPipe를 이용해 객체를 감지해 보자.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

import numpy as np
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
 
MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255, 0, 0)  # red
 
def visualize(image, detection_result) -> np.ndarray:
  """Draws bounding boxes on the input image and return it.
  Args:
    image: The input RGB image.
    detection_result: The list of all "Detection" entities to be visualize.
  Returns:
    Image with bounding boxes.
  """
  for detection in detection_result.detections:
    # Draw bounding_box
    bbox = detection.bounding_box
    start_point = bbox.origin_x, bbox.origin_y
    end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
    cv2.rectangle(image, start_point, end_point, TEXT_COLOR, 3)
 
    # Draw label and score
    category = detection.categories[0]
    category_name = category.category_name
    probability = round(category.score, 2)
    result_text = category_name + ' (' + str(probability) + ')'
    text_location = (MARGIN + bbox.origin_x, MARGIN + ROW_SIZE + bbox.origin_y)
    cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
                FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)
 
  return image
 
# Create an ObjectDetector object.
base_options = python.BaseOptions(model_asset_path='efficientdet_lite2.tflite')
# https://ai.google.dev/edge/mediapipe/solutions/vision/object_detector
options = vision.ObjectDetectorOptions(base_options=base_options, score_threshold=0.5)
detector = vision.ObjectDetector.create_from_options(options)
 
# Load the input image.
image = mp.Image.create_from_file('image.jpg')
#cv_image = cv2.imread('image.jpg')
#image = mp.Image(image_format = mp.ImageFormat.SRGB,
#                 data = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
# https://ai.google.dev/edge/api/mediapipe/python/mp/Image
 
# Detect objects in the input image.
detection_result = detector.detect(image)
 
# Process the detection result. In this case, visualize it.
image_copy = np.copy(image.numpy_view())
annotated_image = visualize(image_copy, detection_result)
rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
 
cv2.imshow('sean', rgb_annotated_image)
cv2.waitKey(0)

 

efficientdet_lite0.tflite

4.39MB

efficientdet_lite2.tflite

7.17MB

여러가지 이미지 파일을 준비해서 소스를 입력하고 실행한다.

이제 내가 감지하고 싶은 오브젝트로 감지 모델을 만들어 보자.

Object detection model customization guide

전체적으로 위 링크를 참고하면 된다. 여기서는 데이터셋을 준비하는 과정에 대해 조금 더 자세히 알아보자.

labels.json 파일은 아래와 같은 구조로 되어 있다.

{
  "categories":[
    {"id":1, "name":<cat1_name>},
    ...
  ],
  "images":[
    {"id":0, "file_name":"<img0>.<jpg/jpeg>"},
    ...
  ],
  "annotations":[
    {"id":0, "image_id":0, "category_id":1, "bbox":[x-top left, y-top left, width, height]},
    ...
  ]
}

실제 labels.json 파일에서 첫 번째(0) 이미지에 대한 내용을 살펴보자.

{
"images": [{"id": 0, "file_name": "IMG_0525.jpg"}, ...],
"annotations": [{"image_id": 0, "bbox": [349, 61, 264, 351], "category_id": 2}, ...],
"categories": [{"id": 0, "name": "background"}, {"id": 1, "name": "android"}, {"id": 2, "name": "pig_android"}]
}

첫 번째 파일이므로 image_id는 0으로, '돼지 안드로이드' 인형이므로 category_id는 2로 세팅 되었다.

bbox는 bounding box이고 [왼쪽 x좌표, 왼쪽 y좌표, 폭, 높이]를 의미한다.

categories에서 첫 번째 0번 id는 실제 쓰이지 않더라도 무조건 background로 지정되어야 한다.

labels.json

0.01MB

저작자표시 비영리 변경금지 (새창열림)

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Face Detection 얼굴 감지 (0)	2025.02.11
[MediaPipe] Hand Landmark Detection 손 특징 감지 (0)	2025.02.11
[DL] Keras(TensorFlow) 관련 에러 해결 (0)	2025.01.15
[ML] MNIST pandas (0)	2024.12.21
[Scraping] 환율 정보를 SMS로 보내기 (3)	2024.01.02

Posted by J-sean

:

[DL] Keras(TensorFlow) 관련 에러 해결

AI, ML, DL 2025. 1. 15. 11:40 |

Keras 관련 에러를 몇 가지 확인하고 해결해 보자.

1)

dense = keras.layers.Dense(10, activation='softmax', input_shape=(784, ))

위 명령을 실행하면 아래와 같은 경고가 출력된다.

UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.

경고이므로 무시하고 넘어간다.

model = keras.Sequential(dense)
이어서 위 명령은 아래와 같은 에러가 출력된다.

TypeError: 'Dense' object is not iterable
아래와 같이 바꿔서 해결한다.
model = keras.Sequential([dense])

아니면 처음부터 아래와 같이 입력하면 경고나 에러 없이 진행 된다.
model = keras.Sequential([keras.Input(shape=(784, )), keras.layers.Dense(10, activation='softmax')])

2)

model.compile(loss='sparse_categorical_crossentropy', metrics='accuracy')

위 명령을 실행하면 아래와 같은 에러가 출력된다.

ValueError: Expected `metrics` argument to be a list, tuple, or dict. Received instead: metrics=accuracy of type <class 'str'>

아래와 같이 바꿔서 해결한다.
model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'])

※ 참고

혼자 공부하는 머신러닝 + 딥러닝

저작자표시 비영리 변경금지 (새창열림)

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Hand Landmark Detection 손 특징 감지 (0)	2025.02.11
[MediaPipe] Object Detection 객체 감지 (1)	2025.02.11
[ML] MNIST pandas (0)	2024.12.21
[Scraping] 환율 정보를 SMS로 보내기 (3)	2024.01.02
[Scraping] 환율 정보 (0)	2024.01.02

Posted by J-sean

:

[ML] MNIST pandas

AI, ML, DL 2024. 12. 21. 17:24 |

MNIST 데이터를 pandas로 읽고 출력해 보자.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#from sklearn.datasets import fetch_openml
 
#mnist = fetch_openml('mnist_784', as_frame=False)
#X, y = mnist.data, mnist.target
 
np.set_printoptions(linewidth=np.inf)
 
mnist = pd.read_csv("mnist_784.csv")
print("■ First 5 Data:")
print(mnist.iloc[0:5, 0:-1])
print("■ First 5 Targets:")
print(mnist.iloc[0:5, -1])
 
FirstImage = mnist.iloc[0, 0:-1].to_numpy().reshape(28, 28)
# values: Return a Numpy representation of the DataFrame,
#         the axes labels will be removed.
 
print("■ First Image:\n", FirstImage)
 
plt.imshow(FirstImage, cmap="binary")
plt.axis("off")
plt.show()

 

1
2
3
4
5
6
7
8
9
10
11
12

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
np.set_printoptions(linewidth=np.inf)
 
mnist = pd.read_csv("mnist_784.csv")
X = mnist.iloc[:, :-1].to_numpy().reshape(-1, 28, 28)
y = mnist.iloc[:, -1].to_numpy()
 
print(X[0])
print("Target: ", y[0])

 

Error: the number of classes has to be greater than one; got 1 class

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDClassifier
 
np.set_printoptions(linewidth=np.inf)
 
mnist = pd.read_csv("mnist_784.csv")
X = mnist.iloc[:, :-1].to_numpy()
y = mnist.iloc[:, -1].to_numpy().astype('str')
# .astype('str')을 삭제하면 y에 숫자 데이터가 저장된다. 그러면 21, 22 라인에서
# 문자5('5')와 비교하기 때문에 모두 False가 되어버려 타겟이 False 클래스 하나만
# 갖게 되어 아래와 같은 에러가 발생한다.
# The number of classes has to be greater than one; got 1 class
# 아니면 .astype('str')을 삭제하고 21, 22 라인에서 '5'가 아닌 5와 비교해도 된다.
 
first_digit = X[0]
# 첫 번째 데이터 지정.
 
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
y_train_5 = (y_train == '5')
y_test_5 = (y_test == '5')
 
sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(X_train, y_train_5)
print(sgd_clf.predict([first_digit]))
# 첫 번째 데이터가 5인지 확인.

 

첫 번째 데이터가 5인지 확인하는 코드.

결과로 True가 출력된다.

저작자표시 비영리 변경금지 (새창열림)

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Object Detection 객체 감지 (1)	2025.02.11
[DL] Keras(TensorFlow) 관련 에러 해결 (0)	2025.01.15
[Scraping] 환율 정보를 SMS로 보내기 (3)	2024.01.02
[Scraping] 환율 정보 (0)	2024.01.02
OCR with Tesseract on Windows - Windows에서 테서랙트 사용하기 (0)	2020.10.07

Posted by J-sean

:

Software Engineer English & Software Engineering Blog - Sean

Category

Recent Posts

Recent Comments

Tags

'AI, ML, DL'에 해당되는 글 16건

[MediaPipe] Async Pose Landmark Detection 비동기 자세 특징 감지

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Face Landmark Detection 얼굴 특징 감지

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Pose Landmark Detection 자세 특징 감지

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Face Detection 얼굴 감지

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Hand Landmark Detection 손 특징 감지

'AI, ML, DL' 카테고리의 다른 글

[MediaPipe] Object Detection 객체 감지

'AI, ML, DL' 카테고리의 다른 글

[DL] Keras(TensorFlow) 관련 에러 해결

'AI, ML, DL' 카테고리의 다른 글

[ML] MNIST pandas

Error: the number of classes has to be greater than one; got 1 class

'AI, ML, DL' 카테고리의 다른 글

티스토리툴바