Spaces:

nabeelarain713
/

virtual_math

Build error

App Files Files Community

nabeelarain713 commited on Sep 1, 2024

Commit

2ccc12d

verified ·

1 Parent(s): a4f17a5

init

Browse files

Files changed (1) hide show

app.py +135 -0

app.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import cv2
+import mediapipe as mp
+import numpy as np
+import google.generativeai as genai
+from langchain_core.messages import HumanMessage
+from langchain_google_genai import ChatGoogleGenerativeAI
+import os
+import streamlit as st
+from PIL import Image
+# Set up environment variables and configurations
+os.environ['GOOGLE_API_KEY'] = st.secrets["AIzaSyCF40XG8X8DJBlbW4-gCeNsoJtURwuwhTw"]
+genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
+# Set up MediaPipe
+mp_hands = mp.solutions.hands
+mp_drawing = mp.solutions.drawing_utils
+hands = mp_hands.Hands()
+# Define gesture constants
+GESTURE_DRAW = 1
+GESTURE_NAVIGATE = 2
+GESTURE_RESET = 3
+GESTURE_SUBMIT = 4
+# Initialize session state
+if 'points' not in st.session_state:
+    st.session_state.points = []
+if 'drawing' not in st.session_state:
+    st.session_state.drawing = False
+if 'new_stroke' not in st.session_state:
+    st.session_state.new_stroke = True
+def interpret_gesture(landmarks):
+    if landmarks[8].y < landmarks[6].y and landmarks[12].y > landmarks[10].y:
+        return GESTURE_DRAW
+    if landmarks[8].y < landmarks[6].y and landmarks[12].y < landmarks[10].y:
+        return GESTURE_NAVIGATE
+    if landmarks[4].y < landmarks[3].y < landmarks[2].y and all(
+        landmarks[i].y > landmarks[3].y for i in range(5, 21)
+    ):
+        return GESTURE_RESET
+    if landmarks[20].y < landmarks[18].y:
+        return GESTURE_SUBMIT
+    return None
+def handle_gesture(gesture, hand_landmarks, frame):
+    if gesture == GESTURE_DRAW:
+        x, y = int(hand_landmarks.landmark[8].x * frame.shape[1]), int(hand_landmarks.landmark[8].y * frame.shape[0])
+        if st.session_state.new_stroke:
+            st.session_state.points.append([])
+            st.session_state.new_stroke = False
+        st.session_state.points[-1].append((x, y))
+        st.session_state.drawing = True
+    else:
+        st.session_state.drawing = False
+        st.session_state.new_stroke = True
+    if gesture == GESTURE_RESET:
+        st.session_state.drawing = False
+        st.session_state.points = []
+    if gesture == GESTURE_SUBMIT:
+        st.session_state.drawing = False
+        return True
+    return False
+def send_to_gemini(drawing_canvas):
+    llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
+    message = HumanMessage(
+        content=[
+            {
+                "type": "text",
+                "text": "Give me the answer of any mathematical representation in the image with the complete solution, and does not say the image contains etc.",
+            },
+            {"type": "image_url", "image_url": drawing_canvas},
+        ]
+    )
+    response = llm.invoke([message]).content
+    return response
+def main():
+    st.title("Virtual Math Calculator")
+    # Set up the webcam
+    cap = cv2.VideoCapture(0)
+    # Create placeholders for the webcam feed and the result
+    webcam_placeholder = st.empty()
+    result_placeholder = st.empty()
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            st.error("Failed to capture frame from camera")
+            break
+        frame = cv2.flip(frame, 1)
+        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        result = hands.process(rgb_frame)
+        if result.multi_hand_landmarks:
+            for hand_landmarks in result.multi_hand_landmarks:
+                gesture = interpret_gesture(hand_landmarks.landmark)
+                submit = handle_gesture(gesture, hand_landmarks, frame)
+                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
+                if submit:
+                    drawing_canvas = np.zeros((600, 800, 3), dtype=np.uint8)
+                    for stroke in st.session_state.points:
+                        for i in range(1, len(stroke)):
+                            cv2.line(drawing_canvas, stroke[i-1], stroke[i], (255, 255, 255), 5)
+                    # Save the drawing canvas as an image
+                    cv2.imwrite('drawing.png', drawing_canvas)
+                    # Send the image to Gemini and get the response
+                    response = send_to_gemini('drawing.png')
+                    # Display the response
+                    result_placeholder.text_area("Result:", value=response, height=300)
+        for stroke in st.session_state.points:
+            for i in range(1, len(stroke)):
+                cv2.line(frame, stroke[i - 1], stroke[i], (0, 255, 0), 5)
+        webcam_placeholder.image(frame, channels="RGB", use_column_width=True)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+    cap.release()
+if __name__ == "__main__":
+    main()