nabeelarain713 commited on
Commit
2ccc12d
·
verified ·
1 Parent(s): a4f17a5
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import mediapipe as mp
3
+ import numpy as np
4
+ import google.generativeai as genai
5
+ from langchain_core.messages import HumanMessage
6
+ from langchain_google_genai import ChatGoogleGenerativeAI
7
+ import os
8
+ import streamlit as st
9
+ from PIL import Image
10
+
11
+ # Set up environment variables and configurations
12
+ os.environ['GOOGLE_API_KEY'] = st.secrets["AIzaSyCF40XG8X8DJBlbW4-gCeNsoJtURwuwhTw"]
13
+ genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
14
+
15
+ # Set up MediaPipe
16
+ mp_hands = mp.solutions.hands
17
+ mp_drawing = mp.solutions.drawing_utils
18
+ hands = mp_hands.Hands()
19
+
20
+ # Define gesture constants
21
+ GESTURE_DRAW = 1
22
+ GESTURE_NAVIGATE = 2
23
+ GESTURE_RESET = 3
24
+ GESTURE_SUBMIT = 4
25
+
26
+ # Initialize session state
27
+ if 'points' not in st.session_state:
28
+ st.session_state.points = []
29
+ if 'drawing' not in st.session_state:
30
+ st.session_state.drawing = False
31
+ if 'new_stroke' not in st.session_state:
32
+ st.session_state.new_stroke = True
33
+
34
+ def interpret_gesture(landmarks):
35
+ if landmarks[8].y < landmarks[6].y and landmarks[12].y > landmarks[10].y:
36
+ return GESTURE_DRAW
37
+ if landmarks[8].y < landmarks[6].y and landmarks[12].y < landmarks[10].y:
38
+ return GESTURE_NAVIGATE
39
+ if landmarks[4].y < landmarks[3].y < landmarks[2].y and all(
40
+ landmarks[i].y > landmarks[3].y for i in range(5, 21)
41
+ ):
42
+ return GESTURE_RESET
43
+ if landmarks[20].y < landmarks[18].y:
44
+ return GESTURE_SUBMIT
45
+ return None
46
+
47
+ def handle_gesture(gesture, hand_landmarks, frame):
48
+ if gesture == GESTURE_DRAW:
49
+ x, y = int(hand_landmarks.landmark[8].x * frame.shape[1]), int(hand_landmarks.landmark[8].y * frame.shape[0])
50
+ if st.session_state.new_stroke:
51
+ st.session_state.points.append([])
52
+ st.session_state.new_stroke = False
53
+ st.session_state.points[-1].append((x, y))
54
+ st.session_state.drawing = True
55
+ else:
56
+ st.session_state.drawing = False
57
+ st.session_state.new_stroke = True
58
+
59
+ if gesture == GESTURE_RESET:
60
+ st.session_state.drawing = False
61
+ st.session_state.points = []
62
+
63
+ if gesture == GESTURE_SUBMIT:
64
+ st.session_state.drawing = False
65
+ return True
66
+ return False
67
+
68
+ def send_to_gemini(drawing_canvas):
69
+ llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
70
+ message = HumanMessage(
71
+ content=[
72
+ {
73
+ "type": "text",
74
+ "text": "Give me the answer of any mathematical representation in the image with the complete solution, and does not say the image contains etc.",
75
+ },
76
+ {"type": "image_url", "image_url": drawing_canvas},
77
+ ]
78
+ )
79
+ response = llm.invoke([message]).content
80
+ return response
81
+
82
+ def main():
83
+ st.title("Virtual Math Calculator")
84
+
85
+ # Set up the webcam
86
+ cap = cv2.VideoCapture(0)
87
+
88
+ # Create placeholders for the webcam feed and the result
89
+ webcam_placeholder = st.empty()
90
+ result_placeholder = st.empty()
91
+
92
+ while True:
93
+ ret, frame = cap.read()
94
+ if not ret:
95
+ st.error("Failed to capture frame from camera")
96
+ break
97
+
98
+ frame = cv2.flip(frame, 1)
99
+ rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
100
+ result = hands.process(rgb_frame)
101
+
102
+ if result.multi_hand_landmarks:
103
+ for hand_landmarks in result.multi_hand_landmarks:
104
+ gesture = interpret_gesture(hand_landmarks.landmark)
105
+ submit = handle_gesture(gesture, hand_landmarks, frame)
106
+ mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
107
+
108
+ if submit:
109
+ drawing_canvas = np.zeros((600, 800, 3), dtype=np.uint8)
110
+ for stroke in st.session_state.points:
111
+ for i in range(1, len(stroke)):
112
+ cv2.line(drawing_canvas, stroke[i-1], stroke[i], (255, 255, 255), 5)
113
+
114
+ # Save the drawing canvas as an image
115
+ cv2.imwrite('drawing.png', drawing_canvas)
116
+
117
+ # Send the image to Gemini and get the response
118
+ response = send_to_gemini('drawing.png')
119
+
120
+ # Display the response
121
+ result_placeholder.text_area("Result:", value=response, height=300)
122
+
123
+ for stroke in st.session_state.points:
124
+ for i in range(1, len(stroke)):
125
+ cv2.line(frame, stroke[i - 1], stroke[i], (0, 255, 0), 5)
126
+
127
+ webcam_placeholder.image(frame, channels="RGB", use_column_width=True)
128
+
129
+ if cv2.waitKey(1) & 0xFF == ord('q'):
130
+ break
131
+
132
+ cap.release()
133
+
134
+ if __name__ == "__main__":
135
+ main()