Spaces:

nabeelarain713
/

virtual_math

Build error

App Files Files Community

nabeelarain713 commited on Sep 1, 2024

Commit

2ad8309

verified ·

1 Parent(s): ca9dd14

Updated

Browse files

Files changed (1) hide show

app.py +58 -139

app.py CHANGED Viewed

@@ -6,67 +6,76 @@ from langchain_core.messages import HumanMessage
 from langchain_google_genai import ChatGoogleGenerativeAI
 import os
 import streamlit as st
-from PIL import Image
-import io
-import base64
 # Set up environment variables and configurations
 genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
-# Set up MediaPipe
 mp_hands = mp.solutions.hands
 mp_drawing = mp.solutions.drawing_utils
 hands = mp_hands.Hands()
-# Define gesture constants
-GESTURE_DRAW = 1
-GESTURE_NAVIGATE = 2
-GESTURE_RESET = 3
-GESTURE_SUBMIT = 4
-# Initialize session state
-if 'points' not in st.session_state:
-    st.session_state.points = []
 if 'drawing' not in st.session_state:
     st.session_state.drawing = False
-if 'new_stroke' not in st.session_state:
-    st.session_state.new_stroke = True
-if 'image' not in st.session_state:
-    st.session_state.image = None
-def interpret_gesture(landmarks):
-    if landmarks[8].y < landmarks[6].y and landmarks[12].y > landmarks[10].y:
-        return GESTURE_DRAW
-    if landmarks[8].y < landmarks[6].y and landmarks[12].y < landmarks[10].y:
-        return GESTURE_NAVIGATE
-    if landmarks[4].y < landmarks[3].y < landmarks[2].y and all(
-        landmarks[i].y > landmarks[3].y for i in range(5, 21)
-    ):
-        return GESTURE_RESET
-    if landmarks[20].y < landmarks[18].y:
-        return GESTURE_SUBMIT
-    return None
-def handle_gesture(gesture, hand_landmarks, frame):
-    if gesture == GESTURE_DRAW:
-        x, y = int(hand_landmarks.landmark[8].x * frame.shape[1]), int(hand_landmarks.landmark[8].y * frame.shape[0])
-        if st.session_state.new_stroke:
-            st.session_state.points.append([])
-            st.session_state.new_stroke = False
-        st.session_state.points[-1].append((x, y))
-        st.session_state.drawing = True
-    else:
-        st.session_state.drawing = False
-        st.session_state.new_stroke = True
-    if gesture == GESTURE_RESET:
-        st.session_state.drawing = False
-        st.session_state.points = []
-    if gesture == GESTURE_SUBMIT:
-        st.session_state.drawing = False
-        return True
-    return False
 def send_to_gemini(drawing_canvas):
     llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
@@ -82,95 +91,5 @@ def send_to_gemini(drawing_canvas):
     response = llm.invoke([message]).content
     return response
-def process_image(image_data):
-    # Decode the image data
-    image_bytes = base64.b64decode(image_data.split(',')[1])
-    image = Image.open(io.BytesIO(image_bytes))
-    # Process the image (e.g., sending to the model)
-    # Example: save image locally
-    image.save('captured_image.png')
-    # Convert image to OpenCV format
-    frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-    # Process the frame with MediaPipe and handle gestures
-    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    result = hands.process(rgb_frame)
-    if result.multi_hand_landmarks:
-        for hand_landmarks in result.multi_hand_landmarks:
-            gesture = interpret_gesture(hand_landmarks.landmark)
-            submit = handle_gesture(gesture, hand_landmarks, frame)
-            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
-            if submit:
-                drawing_canvas = np.zeros((600, 800, 3), dtype=np.uint8)
-                for stroke in st.session_state.points:
-                    for i in range(1, len(stroke)):
-                        cv2.line(drawing_canvas, stroke[i-1], stroke[i], (255, 255, 255), 5)
-                # Save the drawing canvas as an image
-                cv2.imwrite('drawing.png', drawing_canvas)
-                # Send the image to Gemini and get the response
-                response = send_to_gemini('drawing.png')
-                return response
-    return "No gesture detected"
-def main():
-    st.title("Virtual Math Calculator")
-    # HTML & JS for capturing image from the webcam
-    st.markdown("""
-    <script>
-    async function getCameraFeed() {
-        const video = document.createElement('video');
-        video.style.display = 'none';
-        document.body.appendChild(video);
-        const stream = await navigator.mediaDevices.getUserMedia({ video: true });
-        video.srcObject = stream;
-        return new Promise((resolve) => {
-            video.onloadedmetadata = () => {
-                video.play();
-                const canvas = document.createElement('canvas');
-                canvas.width = video.videoWidth;
-                canvas.height = video.videoHeight;
-                const context = canvas.getContext('2d');
-                context.drawImage(video, 0, 0, canvas.width, canvas.height);
-                resolve(canvas.toDataURL('image/png'));
-                stream.getTracks().forEach(track => track.stop());
-                document.body.removeChild(video);
-            };
-        });
-    }
-    async function captureImage() {
-        const image = await getCameraFeed();
-        const response = await fetch('/', {
-            method: 'POST',
-            body: JSON.stringify({ image }),
-            headers: {
-                'Content-Type': 'application/json'
-            }
-        });
-        const result = await response.json();
-        document.getElementById('result').textContent = result.answer;
-    }
-    </script>
-    <button onclick="captureImage()">Capture Image</button>
-    <div id="result"></div>
-    """, unsafe_allow_html=True)
-    # Process captured image from session state
-    if st.session_state.image:
-        response = process_image(st.session_state.image)
-        st.write(response)
 if __name__ == "__main__":
-    main()

 from langchain_google_genai import ChatGoogleGenerativeAI
 import os
 import streamlit as st
+from PIL import Image, ImageDraw
 # Set up environment variables and configurations
 genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
+# Set up MediaPipe (not used in the current implementation but kept for reference)
 mp_hands = mp.solutions.hands
 mp_drawing = mp.solutions.drawing_utils
 hands = mp_hands.Hands()
+# Initialize session state for drawing
 if 'drawing' not in st.session_state:
     st.session_state.drawing = False
+if 'points' not in st.session_state:
+    st.session_state.points = []
+# Function to draw on the canvas
+def draw_on_image(image, points):
+    draw = ImageDraw.Draw(image)
+    for stroke in points:
+        if len(stroke) > 1:
+            draw.line(stroke, fill=(255, 255, 255), width=5)
+def main():
+    st.title("Virtual Math Calculator")
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        # Create a canvas for drawing
+        canvas_placeholder = st.empty()
+        result_placeholder = st.empty()
+        if 'canvas_image' not in st.session_state:
+            st.session_state.canvas_image = Image.new("RGB", (800, 600), (0, 0, 0))
+        if st.button("Reset"):
+            st.session_state.points = []
+            st.session_state.canvas_image = Image.new("RGB", (800, 600), (0, 0, 0))
+        if st.button("Submit"):
+            drawing_canvas = st.session_state.canvas_image.copy()
+            draw_on_image(drawing_canvas, st.session_state.points)
+            drawing_canvas.save('drawing.png')
+            # Send the image to Gemini and get the response
+            response = send_to_gemini('drawing.png')
+            result_placeholder.text_area("Result:", value=response, height=300)
+        canvas_image = np.array(st.session_state.canvas_image)
+        cv2.putText(canvas_image, "Draw here", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
+        canvas_placeholder.image(canvas_image)
+        # Handle mouse events for drawing
+        def update_canvas(image):
+            if st.session_state.drawing:
+                current_pos = st.session_state.current_pos
+                if st.session_state.new_stroke:
+                    st.session_state.points.append([])
+                    st.session_state.new_stroke = False
+                st.session_state.points[-1].append(current_pos)
+                draw_on_image(image, st.session_state.points)
+        st.session_state.drawing = st.button("Start Drawing", key="start")
+        st.session_state.current_pos = st.mouse("Mouse Position")
+        st.session_state.new_stroke = st.button("New Stroke", key="new_stroke")
+        update_canvas(st.session_state.canvas_image)
+    with col2:
+        st.header("Instructions")
+        st.write("1. Use the left side to draw your equation.")
+        st.write("2. Click **Submit** to process the drawing.")
+        st.write("3. The result will be displayed below after submission.")
 def send_to_gemini(drawing_canvas):
     llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
     response = llm.invoke([message]).content
     return response
 if __name__ == "__main__":
+    main()