169 lines
6.8 KiB
Python
169 lines
6.8 KiB
Python
# from https://huggingface.co/CrucibleAI/ControlNetMediaPipeFace/blob/main/laion_face_common.py
|
|
# and https://github.com/ForserX/StableDiffusionUI/blob/main/data/repo/diffusion_scripts/modules/controlnet/laion_face_common.py
|
|
|
|
from typing import Mapping
|
|
|
|
import mediapipe as mp
|
|
import numpy
|
|
|
|
mp_drawing = mp.solutions.drawing_utils
|
|
mp_drawing_styles = mp.solutions.drawing_styles
|
|
mp_face_detection = mp.solutions.face_detection # Only for counting faces.
|
|
mp_face_mesh = mp.solutions.face_mesh
|
|
mp_face_connections = mp.solutions.face_mesh_connections.FACEMESH_TESSELATION
|
|
mp_hand_connections = mp.solutions.hands_connections.HAND_CONNECTIONS
|
|
mp_body_connections = mp.solutions.pose_connections.POSE_CONNECTIONS
|
|
|
|
DrawingSpec = mp.solutions.drawing_styles.DrawingSpec
|
|
PoseLandmark = mp.solutions.drawing_styles.PoseLandmark
|
|
|
|
min_face_size_pixels: int = 64
|
|
f_thick = 2
|
|
f_rad = 1
|
|
right_iris_draw = DrawingSpec(
|
|
color=(10, 200, 250), thickness=f_thick, circle_radius=f_rad
|
|
)
|
|
right_eye_draw = DrawingSpec(
|
|
color=(10, 200, 180), thickness=f_thick, circle_radius=f_rad
|
|
)
|
|
right_eyebrow_draw = DrawingSpec(
|
|
color=(10, 220, 180), thickness=f_thick, circle_radius=f_rad
|
|
)
|
|
left_iris_draw = DrawingSpec(
|
|
color=(250, 200, 10), thickness=f_thick, circle_radius=f_rad
|
|
)
|
|
left_eye_draw = DrawingSpec(
|
|
color=(180, 200, 10), thickness=f_thick, circle_radius=f_rad
|
|
)
|
|
left_eyebrow_draw = DrawingSpec(
|
|
color=(180, 220, 10), thickness=f_thick, circle_radius=f_rad
|
|
)
|
|
mouth_draw = DrawingSpec(color=(10, 180, 10), thickness=f_thick, circle_radius=f_rad)
|
|
head_draw = DrawingSpec(color=(10, 200, 10), thickness=f_thick, circle_radius=f_rad)
|
|
|
|
# mp_face_mesh.FACEMESH_CONTOURS has all the items we care about.
|
|
face_connection_spec = {}
|
|
for edge in mp_face_mesh.FACEMESH_FACE_OVAL:
|
|
face_connection_spec[edge] = head_draw
|
|
for edge in mp_face_mesh.FACEMESH_LEFT_EYE:
|
|
face_connection_spec[edge] = left_eye_draw
|
|
for edge in mp_face_mesh.FACEMESH_LEFT_EYEBROW:
|
|
face_connection_spec[edge] = left_eyebrow_draw
|
|
# for edge in mp_face_mesh.FACEMESH_LEFT_IRIS:
|
|
# face_connection_spec[edge] = left_iris_draw
|
|
for edge in mp_face_mesh.FACEMESH_RIGHT_EYE:
|
|
face_connection_spec[edge] = right_eye_draw
|
|
for edge in mp_face_mesh.FACEMESH_RIGHT_EYEBROW:
|
|
face_connection_spec[edge] = right_eyebrow_draw
|
|
# for edge in mp_face_mesh.FACEMESH_RIGHT_IRIS:
|
|
# face_connection_spec[edge] = right_iris_draw
|
|
for edge in mp_face_mesh.FACEMESH_LIPS:
|
|
face_connection_spec[edge] = mouth_draw
|
|
iris_landmark_spec = {468: right_iris_draw, 473: left_iris_draw}
|
|
|
|
|
|
def draw_pupils(image, landmark_list, drawing_spec, halfwidth: int = 2):
|
|
"""We have a custom function to draw the pupils because the mp.draw_landmarks method requires a parameter for all
|
|
landmarks. Until our PR is merged into mediapipe, we need this separate method."""
|
|
if len(image.shape) != 3:
|
|
raise ValueError("Input image must be H,W,C.")
|
|
image_rows, image_cols, image_channels = image.shape
|
|
if image_channels != 3: # BGR channels
|
|
raise ValueError("Input image must contain three channel bgr data.")
|
|
for idx, landmark in enumerate(landmark_list.landmark):
|
|
if (landmark.HasField("visibility") and landmark.visibility < 0.9) or (
|
|
landmark.HasField("presence") and landmark.presence < 0.5
|
|
):
|
|
continue
|
|
if landmark.x >= 1.0 or landmark.x < 0 or landmark.y >= 1.0 or landmark.y < 0:
|
|
continue
|
|
image_x = int(image_cols * landmark.x)
|
|
image_y = int(image_rows * landmark.y)
|
|
draw_color = None
|
|
if isinstance(drawing_spec, Mapping):
|
|
if drawing_spec.get(idx) is None:
|
|
continue
|
|
else:
|
|
draw_color = drawing_spec[idx].color
|
|
elif isinstance(drawing_spec, DrawingSpec):
|
|
draw_color = drawing_spec.color
|
|
image[
|
|
image_y - halfwidth : image_y + halfwidth,
|
|
image_x - halfwidth : image_x + halfwidth,
|
|
:,
|
|
] = draw_color
|
|
|
|
|
|
def reverse_channels(image):
|
|
"""Given a numpy array in RGB form, convert to BGR. Will also convert from BGR to RGB."""
|
|
# im[:,:,::-1] is a neat hack to convert BGR to RGB by reversing the indexing order.
|
|
# im[:,:,::[2,1,0]] would also work but makes a copy of the data.
|
|
return image[:, :, ::-1]
|
|
|
|
|
|
def generate_annotation(img_rgb, max_faces: int, min_confidence: float):
|
|
"""
|
|
Find up to 'max_faces' inside the provided input image.
|
|
If min_face_size_pixels is provided and nonzero it will be used to filter faces that occupy less than this many
|
|
pixels in the image.
|
|
"""
|
|
with mp_face_mesh.FaceMesh(
|
|
static_image_mode=True,
|
|
max_num_faces=max_faces,
|
|
refine_landmarks=True,
|
|
min_detection_confidence=min_confidence,
|
|
) as facemesh:
|
|
img_height, img_width, img_channels = img_rgb.shape
|
|
assert img_channels == 3
|
|
|
|
results = facemesh.process(img_rgb).multi_face_landmarks
|
|
|
|
if results is None:
|
|
print("No faces detected in controlnet image for Mediapipe face annotator.")
|
|
return numpy.zeros_like(img_rgb)
|
|
|
|
# Filter faces that are too small
|
|
filtered_landmarks = []
|
|
for lm in results:
|
|
landmarks = lm.landmark
|
|
face_rect = [
|
|
landmarks[0].x,
|
|
landmarks[0].y,
|
|
landmarks[0].x,
|
|
landmarks[0].y,
|
|
] # Left, up, right, down.
|
|
for i in range(len(landmarks)):
|
|
face_rect[0] = min(face_rect[0], landmarks[i].x)
|
|
face_rect[1] = min(face_rect[1], landmarks[i].y)
|
|
face_rect[2] = max(face_rect[2], landmarks[i].x)
|
|
face_rect[3] = max(face_rect[3], landmarks[i].y)
|
|
if min_face_size_pixels > 0:
|
|
face_width = abs(face_rect[2] - face_rect[0])
|
|
face_height = abs(face_rect[3] - face_rect[1])
|
|
face_width_pixels = face_width * img_width
|
|
face_height_pixels = face_height * img_height
|
|
face_size = min(face_width_pixels, face_height_pixels)
|
|
if face_size >= min_face_size_pixels:
|
|
filtered_landmarks.append(lm)
|
|
else:
|
|
filtered_landmarks.append(lm)
|
|
|
|
# Annotations are drawn in BGR for some reason, but we don't need to flip a zero-filled image at the start.
|
|
empty = numpy.zeros_like(img_rgb)
|
|
|
|
# Draw detected faces:
|
|
for face_landmarks in filtered_landmarks:
|
|
mp_drawing.draw_landmarks(
|
|
empty,
|
|
face_landmarks,
|
|
connections=face_connection_spec.keys(),
|
|
landmark_drawing_spec=None,
|
|
connection_drawing_spec=face_connection_spec,
|
|
)
|
|
draw_pupils(empty, face_landmarks, iris_landmark_spec, 2)
|
|
|
|
# Flip BGR back to RGB.
|
|
empty = reverse_channels(empty).copy()
|
|
|
|
return empty
|