Python Mouse用手远程控制
#python #machinelearning #softwareengineering #computervision

通常,我们使用计算机鼠标,但可以远程使用它。

在Python中,使用一些库,例如MediaPipe,Pyautogui和OpenCV。在这里,MediaPipe使我们可以跟踪我们用于左,右和双击的操作的手点。

pyautogui用于移动并单击操作的鼠标事件。但是该项目可以很好地优化。

from concurrent.futures import ThreadPoolExecutor
from math import sqrt
import cv2
import mediapipe as mp
import psutil
from entities.mouse import Mouse
from win32api import GetSystemMetrics

grabSize = 50

pool = ThreadPoolExecutor(max_workers=psutil.cpu_count())
mp_hands = mp.solutions.hands

hands = mp_hands.Hands(static_image_mode=False,
                       max_num_hands=1,
                       min_detection_confidence=0.5,
                       min_tracking_confidence=0.5
                       )

mp_draw = mp.solutions.drawing_utils
mouse = Mouse(GetSystemMetrics(0), GetSystemMetrics(1))


def resolution(cap, w, h):
    cap.set(3, w)
    cap.set(4, h)


def isExist(value):
    return bool(value)


def getDistance(a, b, hands_landmarks, scale=False, w=0, h=0):
    dx = hands_landmarks[0].landmark[a].x - hands_landmarks[0].landmark[b].x
    dy = hands_landmarks[0].landmark[a].y - hands_landmarks[0].landmark[b].y
    if (not scale):
        return sqrt(dx ** 2 + dy ** 2)
    else:
        return sqrt((dx * w) ** 2 + (dy * h) ** 2)


def draw(frame, hands_landmarks):
    h, w, c = frame.shape

    distance_three_to_seventeen = round(getDistance(3, 17, hands_landmarks, True, w, h))
    distance_zero_to_ten = round(getDistance(0, 10, hands_landmarks, True, w, h))
    distance_left = getDistance(4, 8, hands_landmarks)
    distance_double_click = getDistance(4, 12, hands_landmarks)
    distance_point = getDistance(9, 0, hands_landmarks)
    distance_right = getDistance(16, 4, hands_landmarks)
    distance_width = getDistance(4, 20, hands_landmarks)
    distance_height = getDistance(0, 12, hands_landmarks)

    wrapped_frame = frame[int(distance_zero_to_ten):int(h - distance_zero_to_ten),
                    int(distance_three_to_seventeen):int(w - distance_three_to_seventeen)]


    wh, ww, wc = wrapped_frame.shape
    wx = w * (hands_landmarks[0].landmark[0].x + hands_landmarks[0].landmark[9].x) / 2 - distance_three_to_seventeen
    wy = h * (hands_landmarks[0].landmark[0].y + hands_landmarks[0].landmark[9].y) / 2 - distance_zero_to_ten

    if (wx < 0):
        wx = 1
    if (wy < 0):
        wy = 1

    try:
        x = round((wx) * (mouse._swidth / (ww)))
        y = round((wy) * (mouse._sheight / (wh)))
    except ZeroDivisionError:
        return

    mouse.move(mouse._swidth - x + (0 if (mouse._swidth - x) < mouse._swidth / 2 else distance_width), y +
               (0 if (mouse._sheight - y) < mouse._sheight / 2 else distance_height))

    if (distance_left * 100 < distance_point * 100 / 5):
        mouse.click()
    elif (distance_right * 100 < distance_point * 100 / 5):
        mouse.click(button="right")
    elif (distance_double_click * 100 < distance_point * 100 / 5):
        mouse.click(button="double")


def start(cam_code=0):
    cap = cv2.VideoCapture(cam_code)

    while (True):
        ret, frame = cap.read()
        if not ret:
            print("failed to grab frame")
            break
        frame = cv2.GaussianBlur(frame, (3, 3), 0)
        grappedImage = frame[grabSize:frame.shape[0] - grabSize, grabSize:frame.shape[1] - grabSize]

        rgb_img = cv2.cvtColor(grappedImage, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_img)
        hands_landmarks = results.multi_hand_landmarks

        if (isExist(hands_landmarks)):

            try:
                cv2.imshow("hand", grappedImage[int(grappedImage.shape[0] * hands_landmarks[0].landmark[12].y):
                                                int(grappedImage.shape[0] * hands_landmarks[0].landmark[12].y + int(
                                                    grappedImage.shape[0] * hands_landmarks[0].landmark[0].y) - int(
                                                    grappedImage.shape[0] * hands_landmarks[0].landmark[12].y)),
                                   int(grappedImage.shape[1] * hands_landmarks[0].landmark[20].x):int(
                                       grappedImage.shape[1] * hands_landmarks[0].landmark[20].x) + int(
                                       grappedImage.shape[1] * hands_landmarks[0].landmark[4].x) - int(
                                       grappedImage.shape[1] * hands_landmarks[0].landmark[20].x)])
            except:
                pass;
            pool.submit(draw, grappedImage, hands_landmarks)

        cv2.imshow("Hand tracer", grappedImage)
        if (cv2.waitKey(1) == ord('q')):
            break

    cap.release()
    cv2.destroyAllWindows()

cap.py用于抓取框架。帧包含RGB格式的像素颜色的值,而Hands_landmarks则包含我们的手和它的点。线程池在这里用于使渲染时间快速,但也许这不是一个好的解决方案,因此我们可以改进。

pool = ThreadPoolExecutor(max_workers=psutil.cpu_count())

根据GPU的功率指定了线程的池大小。

欧几里得距离在这里非常重要,因为我用它来计算手标记之间的距离。这是针对鼠标动作的。

if (distance_left * 100 < distance_point * 100 / 5):
        mouse.click()
elif (distance_right * 100 < distance_point * 100 / 5):
        mouse.click(button="right")
elif (distance_double_click * 100 < distance_point * 100 / 5):
        mouse.click(button="double")

我指定了做出决定的技术。在屏幕上,通过从计算到我们的计算机屏幕位置的手点缩放来指定鼠标的位置。

import pyautogui,time,threading


class Mouse():
    def __init__(self,screen_width,screen_height):
        self._swidth=screen_width;
        self._sheight=screen_height;

    def move(self,x,y):
       if(y>0 and x>0 and y<self._sheight and x<self._swidth):
        pyautogui.moveTo((x,y))

    def start_click(self,button):
      if(self._swidth>pyautogui.position()[0] and pyautogui.position()[1]<self._sheight):
       if(button=="left"):
        pyautogui.click(pyautogui.position()[0],pyautogui.position()[1])
       elif(button=="right"):
        pyautogui.click(pyautogui.position()[0],pyautogui.position()[1],button="right") 
       else:
        pyautogui.doubleClick(pyautogui.position()[0],pyautogui.position()[1])

    def click(self,button="left"):
      time.sleep(0.15)
      thread=threading.Thread(target=self.start_click,args=(button,),daemon=False)
      thread.start();
def move(self,x,y):
       if(y>0 and x>0 and y<self._sheight and x<self._swidth):
        pyautogui.moveTo((x,y))

从框架上取的位置移动到那里。我们可以看到其他动作,例如左和右键。

def cam_control(code):
   cap=__import__("cv2").VideoCapture(code);
   res,frame=cap.read()
   __import__("cv2").imshow("test",frame)
   __import__("cv2").destroyAllWindows();
   cap.release();

您可以检查您的相机是否在工作

from core.cap import start

if __name__ == '__main__':
    start(0)

最后,我们可以启动应用程序。可以改进应用程序,因为存在一些问题,例如单击或难以单击某些位置。

Python计算机视觉 - 鼠标控制远程控制
我向您展示了可以将鼠标与Olur Handa一起使用。还有一个不同的选择,LT是眼睛

示例视频:
https://www.youtube.com/shorts/8_0IBCoxzPg

https://github.com/gokhanergen-tech/Remote-Computer-Control-With-Hand