nabeelarain713 commited on
Commit
ca9dd14
·
verified ·
1 Parent(s): 5f2f7cf
Files changed (1) hide show
  1. app.py +90 -49
app.py CHANGED
@@ -7,9 +7,10 @@ from langchain_google_genai import ChatGoogleGenerativeAI
7
  import os
8
  import streamlit as st
9
  from PIL import Image
 
 
10
 
11
  # Set up environment variables and configurations
12
- # os.environ['GOOGLE_API_KEY'] = st.secrets["AIzaSyCF40XG8X8DJBlbW4-gCeNsoJtURwuwhTw"]
13
  genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
14
 
15
  # Set up MediaPipe
@@ -30,6 +31,8 @@ if 'drawing' not in st.session_state:
30
  st.session_state.drawing = False
31
  if 'new_stroke' not in st.session_state:
32
  st.session_state.new_stroke = True
 
 
33
 
34
  def interpret_gesture(landmarks):
35
  if landmarks[8].y < landmarks[6].y and landmarks[12].y > landmarks[10].y:
@@ -79,57 +82,95 @@ def send_to_gemini(drawing_canvas):
79
  response = llm.invoke([message]).content
80
  return response
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def main():
83
  st.title("Virtual Math Calculator")
84
 
85
- # Set up the webcam
86
- cap = cv2.VideoCapture(0)
87
-
88
- # Create placeholders for the webcam feed and the result
89
- webcam_placeholder = st.empty()
90
- result_placeholder = st.empty()
91
-
92
- while True:
93
- ret, frame = cap.read()
94
- if not ret:
95
- st.error("Failed to capture frame from camera")
96
- break
97
-
98
- frame = cv2.flip(frame, 1)
99
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
100
- result = hands.process(rgb_frame)
101
-
102
- if result.multi_hand_landmarks:
103
- for hand_landmarks in result.multi_hand_landmarks:
104
- gesture = interpret_gesture(hand_landmarks.landmark)
105
- submit = handle_gesture(gesture, hand_landmarks, frame)
106
- mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
107
-
108
- if submit:
109
- drawing_canvas = np.zeros((600, 800, 3), dtype=np.uint8)
110
- for stroke in st.session_state.points:
111
- for i in range(1, len(stroke)):
112
- cv2.line(drawing_canvas, stroke[i-1], stroke[i], (255, 255, 255), 5)
113
-
114
- # Save the drawing canvas as an image
115
- cv2.imwrite('drawing.png', drawing_canvas)
116
-
117
- # Send the image to Gemini and get the response
118
- response = send_to_gemini('drawing.png')
119
-
120
- # Display the response
121
- result_placeholder.text_area("Result:", value=response, height=300)
122
-
123
- for stroke in st.session_state.points:
124
- for i in range(1, len(stroke)):
125
- cv2.line(frame, stroke[i - 1], stroke[i], (0, 255, 0), 5)
126
-
127
- webcam_placeholder.image(frame, channels="RGB", use_column_width=True)
128
-
129
- if cv2.waitKey(1) & 0xFF == ord('q'):
130
- break
131
-
132
- cap.release()
 
133
 
134
  if __name__ == "__main__":
135
  main()
 
7
  import os
8
  import streamlit as st
9
  from PIL import Image
10
+ import io
11
+ import base64
12
 
13
  # Set up environment variables and configurations
 
14
  genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
15
 
16
  # Set up MediaPipe
 
31
  st.session_state.drawing = False
32
  if 'new_stroke' not in st.session_state:
33
  st.session_state.new_stroke = True
34
+ if 'image' not in st.session_state:
35
+ st.session_state.image = None
36
 
37
  def interpret_gesture(landmarks):
38
  if landmarks[8].y < landmarks[6].y and landmarks[12].y > landmarks[10].y:
 
82
  response = llm.invoke([message]).content
83
  return response
84
 
85
+ def process_image(image_data):
86
+ # Decode the image data
87
+ image_bytes = base64.b64decode(image_data.split(',')[1])
88
+ image = Image.open(io.BytesIO(image_bytes))
89
+
90
+ # Process the image (e.g., sending to the model)
91
+ # Example: save image locally
92
+ image.save('captured_image.png')
93
+
94
+ # Convert image to OpenCV format
95
+ frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
96
+
97
+ # Process the frame with MediaPipe and handle gestures
98
+ rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
99
+ result = hands.process(rgb_frame)
100
+
101
+ if result.multi_hand_landmarks:
102
+ for hand_landmarks in result.multi_hand_landmarks:
103
+ gesture = interpret_gesture(hand_landmarks.landmark)
104
+ submit = handle_gesture(gesture, hand_landmarks, frame)
105
+ mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
106
+
107
+ if submit:
108
+ drawing_canvas = np.zeros((600, 800, 3), dtype=np.uint8)
109
+ for stroke in st.session_state.points:
110
+ for i in range(1, len(stroke)):
111
+ cv2.line(drawing_canvas, stroke[i-1], stroke[i], (255, 255, 255), 5)
112
+
113
+ # Save the drawing canvas as an image
114
+ cv2.imwrite('drawing.png', drawing_canvas)
115
+
116
+ # Send the image to Gemini and get the response
117
+ response = send_to_gemini('drawing.png')
118
+
119
+ return response
120
+ return "No gesture detected"
121
+
122
  def main():
123
  st.title("Virtual Math Calculator")
124
 
125
+ # HTML & JS for capturing image from the webcam
126
+ st.markdown("""
127
+ <script>
128
+ async function getCameraFeed() {
129
+ const video = document.createElement('video');
130
+ video.style.display = 'none';
131
+ document.body.appendChild(video);
132
+
133
+ const stream = await navigator.mediaDevices.getUserMedia({ video: true });
134
+ video.srcObject = stream;
135
+
136
+ return new Promise((resolve) => {
137
+ video.onloadedmetadata = () => {
138
+ video.play();
139
+ const canvas = document.createElement('canvas');
140
+ canvas.width = video.videoWidth;
141
+ canvas.height = video.videoHeight;
142
+ const context = canvas.getContext('2d');
143
+ context.drawImage(video, 0, 0, canvas.width, canvas.height);
144
+ resolve(canvas.toDataURL('image/png'));
145
+ stream.getTracks().forEach(track => track.stop());
146
+ document.body.removeChild(video);
147
+ };
148
+ });
149
+ }
150
+
151
+ async function captureImage() {
152
+ const image = await getCameraFeed();
153
+ const response = await fetch('/', {
154
+ method: 'POST',
155
+ body: JSON.stringify({ image }),
156
+ headers: {
157
+ 'Content-Type': 'application/json'
158
+ }
159
+ });
160
+ const result = await response.json();
161
+ document.getElementById('result').textContent = result.answer;
162
+ }
163
+
164
+ </script>
165
+
166
+ <button onclick="captureImage()">Capture Image</button>
167
+ <div id="result"></div>
168
+ """, unsafe_allow_html=True)
169
+
170
+ # Process captured image from session state
171
+ if st.session_state.image:
172
+ response = process_image(st.session_state.image)
173
+ st.write(response)
174
 
175
  if __name__ == "__main__":
176
  main()