nabeelarain713 commited on
Commit
2ad8309
·
verified ·
1 Parent(s): ca9dd14
Files changed (1) hide show
  1. app.py +58 -139
app.py CHANGED
@@ -6,67 +6,76 @@ from langchain_core.messages import HumanMessage
6
  from langchain_google_genai import ChatGoogleGenerativeAI
7
  import os
8
  import streamlit as st
9
- from PIL import Image
10
- import io
11
- import base64
12
 
13
  # Set up environment variables and configurations
14
  genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
15
 
16
- # Set up MediaPipe
17
  mp_hands = mp.solutions.hands
18
  mp_drawing = mp.solutions.drawing_utils
19
  hands = mp_hands.Hands()
20
 
21
- # Define gesture constants
22
- GESTURE_DRAW = 1
23
- GESTURE_NAVIGATE = 2
24
- GESTURE_RESET = 3
25
- GESTURE_SUBMIT = 4
26
-
27
- # Initialize session state
28
- if 'points' not in st.session_state:
29
- st.session_state.points = []
30
  if 'drawing' not in st.session_state:
31
  st.session_state.drawing = False
32
- if 'new_stroke' not in st.session_state:
33
- st.session_state.new_stroke = True
34
- if 'image' not in st.session_state:
35
- st.session_state.image = None
36
-
37
- def interpret_gesture(landmarks):
38
- if landmarks[8].y < landmarks[6].y and landmarks[12].y > landmarks[10].y:
39
- return GESTURE_DRAW
40
- if landmarks[8].y < landmarks[6].y and landmarks[12].y < landmarks[10].y:
41
- return GESTURE_NAVIGATE
42
- if landmarks[4].y < landmarks[3].y < landmarks[2].y and all(
43
- landmarks[i].y > landmarks[3].y for i in range(5, 21)
44
- ):
45
- return GESTURE_RESET
46
- if landmarks[20].y < landmarks[18].y:
47
- return GESTURE_SUBMIT
48
- return None
49
 
50
- def handle_gesture(gesture, hand_landmarks, frame):
51
- if gesture == GESTURE_DRAW:
52
- x, y = int(hand_landmarks.landmark[8].x * frame.shape[1]), int(hand_landmarks.landmark[8].y * frame.shape[0])
53
- if st.session_state.new_stroke:
54
- st.session_state.points.append([])
55
- st.session_state.new_stroke = False
56
- st.session_state.points[-1].append((x, y))
57
- st.session_state.drawing = True
58
- else:
59
- st.session_state.drawing = False
60
- st.session_state.new_stroke = True
61
 
62
- if gesture == GESTURE_RESET:
63
- st.session_state.drawing = False
64
- st.session_state.points = []
65
 
66
- if gesture == GESTURE_SUBMIT:
67
- st.session_state.drawing = False
68
- return True
69
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  def send_to_gemini(drawing_canvas):
72
  llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
@@ -82,95 +91,5 @@ def send_to_gemini(drawing_canvas):
82
  response = llm.invoke([message]).content
83
  return response
84
 
85
- def process_image(image_data):
86
- # Decode the image data
87
- image_bytes = base64.b64decode(image_data.split(',')[1])
88
- image = Image.open(io.BytesIO(image_bytes))
89
-
90
- # Process the image (e.g., sending to the model)
91
- # Example: save image locally
92
- image.save('captured_image.png')
93
-
94
- # Convert image to OpenCV format
95
- frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
96
-
97
- # Process the frame with MediaPipe and handle gestures
98
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
99
- result = hands.process(rgb_frame)
100
-
101
- if result.multi_hand_landmarks:
102
- for hand_landmarks in result.multi_hand_landmarks:
103
- gesture = interpret_gesture(hand_landmarks.landmark)
104
- submit = handle_gesture(gesture, hand_landmarks, frame)
105
- mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
106
-
107
- if submit:
108
- drawing_canvas = np.zeros((600, 800, 3), dtype=np.uint8)
109
- for stroke in st.session_state.points:
110
- for i in range(1, len(stroke)):
111
- cv2.line(drawing_canvas, stroke[i-1], stroke[i], (255, 255, 255), 5)
112
-
113
- # Save the drawing canvas as an image
114
- cv2.imwrite('drawing.png', drawing_canvas)
115
-
116
- # Send the image to Gemini and get the response
117
- response = send_to_gemini('drawing.png')
118
-
119
- return response
120
- return "No gesture detected"
121
-
122
- def main():
123
- st.title("Virtual Math Calculator")
124
-
125
- # HTML & JS for capturing image from the webcam
126
- st.markdown("""
127
- <script>
128
- async function getCameraFeed() {
129
- const video = document.createElement('video');
130
- video.style.display = 'none';
131
- document.body.appendChild(video);
132
-
133
- const stream = await navigator.mediaDevices.getUserMedia({ video: true });
134
- video.srcObject = stream;
135
-
136
- return new Promise((resolve) => {
137
- video.onloadedmetadata = () => {
138
- video.play();
139
- const canvas = document.createElement('canvas');
140
- canvas.width = video.videoWidth;
141
- canvas.height = video.videoHeight;
142
- const context = canvas.getContext('2d');
143
- context.drawImage(video, 0, 0, canvas.width, canvas.height);
144
- resolve(canvas.toDataURL('image/png'));
145
- stream.getTracks().forEach(track => track.stop());
146
- document.body.removeChild(video);
147
- };
148
- });
149
- }
150
-
151
- async function captureImage() {
152
- const image = await getCameraFeed();
153
- const response = await fetch('/', {
154
- method: 'POST',
155
- body: JSON.stringify({ image }),
156
- headers: {
157
- 'Content-Type': 'application/json'
158
- }
159
- });
160
- const result = await response.json();
161
- document.getElementById('result').textContent = result.answer;
162
- }
163
-
164
- </script>
165
-
166
- <button onclick="captureImage()">Capture Image</button>
167
- <div id="result"></div>
168
- """, unsafe_allow_html=True)
169
-
170
- # Process captured image from session state
171
- if st.session_state.image:
172
- response = process_image(st.session_state.image)
173
- st.write(response)
174
-
175
  if __name__ == "__main__":
176
- main()
 
6
  from langchain_google_genai import ChatGoogleGenerativeAI
7
  import os
8
  import streamlit as st
9
+ from PIL import Image, ImageDraw
 
 
10
 
11
  # Set up environment variables and configurations
12
  genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
13
 
14
+ # Set up MediaPipe (not used in the current implementation but kept for reference)
15
  mp_hands = mp.solutions.hands
16
  mp_drawing = mp.solutions.drawing_utils
17
  hands = mp_hands.Hands()
18
 
19
+ # Initialize session state for drawing
 
 
 
 
 
 
 
 
20
  if 'drawing' not in st.session_state:
21
  st.session_state.drawing = False
22
+ if 'points' not in st.session_state:
23
+ st.session_state.points = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Function to draw on the canvas
26
+ def draw_on_image(image, points):
27
+ draw = ImageDraw.Draw(image)
28
+ for stroke in points:
29
+ if len(stroke) > 1:
30
+ draw.line(stroke, fill=(255, 255, 255), width=5)
 
 
 
 
 
31
 
32
+ def main():
33
+ st.title("Virtual Math Calculator")
 
34
 
35
+ col1, col2 = st.columns([3, 1])
36
+
37
+ with col1:
38
+ # Create a canvas for drawing
39
+ canvas_placeholder = st.empty()
40
+ result_placeholder = st.empty()
41
+ if 'canvas_image' not in st.session_state:
42
+ st.session_state.canvas_image = Image.new("RGB", (800, 600), (0, 0, 0))
43
+ if st.button("Reset"):
44
+ st.session_state.points = []
45
+ st.session_state.canvas_image = Image.new("RGB", (800, 600), (0, 0, 0))
46
+ if st.button("Submit"):
47
+ drawing_canvas = st.session_state.canvas_image.copy()
48
+ draw_on_image(drawing_canvas, st.session_state.points)
49
+ drawing_canvas.save('drawing.png')
50
+
51
+ # Send the image to Gemini and get the response
52
+ response = send_to_gemini('drawing.png')
53
+ result_placeholder.text_area("Result:", value=response, height=300)
54
+
55
+ canvas_image = np.array(st.session_state.canvas_image)
56
+ cv2.putText(canvas_image, "Draw here", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
57
+ canvas_placeholder.image(canvas_image)
58
+
59
+ # Handle mouse events for drawing
60
+ def update_canvas(image):
61
+ if st.session_state.drawing:
62
+ current_pos = st.session_state.current_pos
63
+ if st.session_state.new_stroke:
64
+ st.session_state.points.append([])
65
+ st.session_state.new_stroke = False
66
+ st.session_state.points[-1].append(current_pos)
67
+ draw_on_image(image, st.session_state.points)
68
+
69
+ st.session_state.drawing = st.button("Start Drawing", key="start")
70
+ st.session_state.current_pos = st.mouse("Mouse Position")
71
+ st.session_state.new_stroke = st.button("New Stroke", key="new_stroke")
72
+ update_canvas(st.session_state.canvas_image)
73
+
74
+ with col2:
75
+ st.header("Instructions")
76
+ st.write("1. Use the left side to draw your equation.")
77
+ st.write("2. Click **Submit** to process the drawing.")
78
+ st.write("3. The result will be displayed below after submission.")
79
 
80
  def send_to_gemini(drawing_canvas):
81
  llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
 
91
  response = llm.invoke([message]).content
92
  return response
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  if __name__ == "__main__":
95
+ main()