Spaces:

nabeelarain713
/

virtual_math

Build error

App Files Files Community

nabeelarain713 commited on Sep 1, 2024

Commit

ca9dd14

verified ·

1 Parent(s): 5f2f7cf

Updated

Browse files

Files changed (1) hide show

app.py +90 -49

app.py CHANGED Viewed

@@ -7,9 +7,10 @@ from langchain_google_genai import ChatGoogleGenerativeAI
 import os
 import streamlit as st
 from PIL import Image
 # Set up environment variables and configurations
-# os.environ['GOOGLE_API_KEY'] = st.secrets["AIzaSyCF40XG8X8DJBlbW4-gCeNsoJtURwuwhTw"]
 genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
 # Set up MediaPipe
@@ -30,6 +31,8 @@ if 'drawing' not in st.session_state:
     st.session_state.drawing = False
 if 'new_stroke' not in st.session_state:
     st.session_state.new_stroke = True
 def interpret_gesture(landmarks):
     if landmarks[8].y < landmarks[6].y and landmarks[12].y > landmarks[10].y:
@@ -79,57 +82,95 @@ def send_to_gemini(drawing_canvas):
     response = llm.invoke([message]).content
     return response
 def main():
     st.title("Virtual Math Calculator")
-    # Set up the webcam
-    cap = cv2.VideoCapture(0)
-    # Create placeholders for the webcam feed and the result
-    webcam_placeholder = st.empty()
-    result_placeholder = st.empty()
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            st.error("Failed to capture frame from camera")
-            break
-        frame = cv2.flip(frame, 1)
-        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        result = hands.process(rgb_frame)
-        if result.multi_hand_landmarks:
-            for hand_landmarks in result.multi_hand_landmarks:
-                gesture = interpret_gesture(hand_landmarks.landmark)
-                submit = handle_gesture(gesture, hand_landmarks, frame)
-                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
-                if submit:
-                    drawing_canvas = np.zeros((600, 800, 3), dtype=np.uint8)
-                    for stroke in st.session_state.points:
-                        for i in range(1, len(stroke)):
-                            cv2.line(drawing_canvas, stroke[i-1], stroke[i], (255, 255, 255), 5)
-                    # Save the drawing canvas as an image
-                    cv2.imwrite('drawing.png', drawing_canvas)
-                    # Send the image to Gemini and get the response
-                    response = send_to_gemini('drawing.png')
-                    # Display the response
-                    result_placeholder.text_area("Result:", value=response, height=300)
-        for stroke in st.session_state.points:
-            for i in range(1, len(stroke)):
-                cv2.line(frame, stroke[i - 1], stroke[i], (0, 255, 0), 5)
-        webcam_placeholder.image(frame, channels="RGB", use_column_width=True)
-        if cv2.waitKey(1) & 0xFF == ord('q'):
-            break
-    cap.release()
 if __name__ == "__main__":
     main()

 import os
 import streamlit as st
 from PIL import Image
+import io
+import base64
 # Set up environment variables and configurations
 genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
 # Set up MediaPipe
     st.session_state.drawing = False
 if 'new_stroke' not in st.session_state:
     st.session_state.new_stroke = True
+if 'image' not in st.session_state:
+    st.session_state.image = None
 def interpret_gesture(landmarks):
     if landmarks[8].y < landmarks[6].y and landmarks[12].y > landmarks[10].y:
     response = llm.invoke([message]).content
     return response
+def process_image(image_data):
+    # Decode the image data
+    image_bytes = base64.b64decode(image_data.split(',')[1])
+    image = Image.open(io.BytesIO(image_bytes))
+    # Process the image (e.g., sending to the model)
+    # Example: save image locally
+    image.save('captured_image.png')
+    # Convert image to OpenCV format
+    frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    # Process the frame with MediaPipe and handle gestures
+    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    result = hands.process(rgb_frame)
+    if result.multi_hand_landmarks:
+        for hand_landmarks in result.multi_hand_landmarks:
+            gesture = interpret_gesture(hand_landmarks.landmark)
+            submit = handle_gesture(gesture, hand_landmarks, frame)
+            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
+            if submit:
+                drawing_canvas = np.zeros((600, 800, 3), dtype=np.uint8)
+                for stroke in st.session_state.points:
+                    for i in range(1, len(stroke)):
+                        cv2.line(drawing_canvas, stroke[i-1], stroke[i], (255, 255, 255), 5)
+                # Save the drawing canvas as an image
+                cv2.imwrite('drawing.png', drawing_canvas)
+                # Send the image to Gemini and get the response
+                response = send_to_gemini('drawing.png')
+                return response
+    return "No gesture detected"
 def main():
     st.title("Virtual Math Calculator")
+    # HTML & JS for capturing image from the webcam
+    st.markdown("""
+    <script>
+    async function getCameraFeed() {
+        const video = document.createElement('video');
+        video.style.display = 'none';
+        document.body.appendChild(video);
+        const stream = await navigator.mediaDevices.getUserMedia({ video: true });
+        video.srcObject = stream;
+        return new Promise((resolve) => {
+            video.onloadedmetadata = () => {
+                video.play();
+                const canvas = document.createElement('canvas');
+                canvas.width = video.videoWidth;
+                canvas.height = video.videoHeight;
+                const context = canvas.getContext('2d');
+                context.drawImage(video, 0, 0, canvas.width, canvas.height);
+                resolve(canvas.toDataURL('image/png'));
+                stream.getTracks().forEach(track => track.stop());
+                document.body.removeChild(video);
+            };
+        });
+    }
+    async function captureImage() {
+        const image = await getCameraFeed();
+        const response = await fetch('/', {
+            method: 'POST',
+            body: JSON.stringify({ image }),
+            headers: {
+                'Content-Type': 'application/json'
+            }
+        });
+        const result = await response.json();
+        document.getElementById('result').textContent = result.answer;
+    }
+    </script>
+    <button onclick="captureImage()">Capture Image</button>
+    <div id="result"></div>
+    """, unsafe_allow_html=True)
+    # Process captured image from session state
+    if st.session_state.image:
+        response = process_image(st.session_state.image)
+        st.write(response)
 if __name__ == "__main__":
     main()