AamirMalik commited on
Commit
dd45714
Β·
verified Β·
1 Parent(s): 9b37ede

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -98
app.py CHANGED
@@ -1,92 +1,33 @@
1
  import streamlit as st
2
- import numpy as np
 
3
  from PIL import Image
4
- from transformers import AutoImageProcessor
 
5
  import cv2
6
  import time
7
- import torch
8
- import requests
9
- import json
10
- import os
11
-
12
- # Groq API Configuration
13
- GROQ_API_KEY = os.getenv("HF_GROQ_API_KEY") # Fetch key from Hugging Face secrets
14
- GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
15
-
16
- # Load processor
17
- processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
18
 
19
- # Placeholder sign labels
20
- sign_labels = {
21
- 0: "Hello",
22
- 1: "Thank You",
23
- 2: "Yes",
24
- 3: "No",
25
- 4: "Please"
26
- }
27
 
28
- # Function to classify sign and refine using Groq API
29
  def classify_sign(image):
30
  image = image.convert("RGB")
31
  inputs = processor(images=image, return_tensors="pt")
32
- prediction = inputs['pixel_values'].argmax().item()
33
- gesture = sign_labels.get(prediction % len(sign_labels), "Unknown Sign")
 
 
34
 
35
- if GROQ_API_KEY:
36
- response = requests.post(
37
- GROQ_API_URL,
38
- headers={
39
- "Content-Type": "application/json",
40
- "Authorization": f"Bearer {GROQ_API_KEY}"
41
- },
42
- json={
43
- "model": "llama-3.3-70b-versatile",
44
- "messages": [{"role": "user", "content": f"Refine this detected sign: {gesture}"}]
45
- }
46
- )
47
-
48
- if response.status_code == 200:
49
- return response.json()['choices'][0]['message']['content']
50
-
51
- return gesture
52
-
53
- # Function to generate sign video from text
54
- # Function to generate sign video from text
55
- def generate_sign_video(text):
56
- if GROQ_API_KEY:
57
- response = requests.post(
58
- GROQ_API_URL,
59
- headers={
60
- "Content-Type": "application/json",
61
- "Authorization": f"Bearer {GROQ_API_KEY}"
62
- },
63
- json={
64
- "model": "llama-3.3-70b-versatile",
65
- "messages": [{"role": "user", "content": f"Generate sign language video for: {text}"}]
66
- }
67
- )
68
-
69
- if response.status_code == 200:
70
- return "https://www.w3schools.com/html/mov_bbb.mp4" # Open-source sample video
71
-
72
- return "https://www.w3schools.com/html/mov_bbb.mp4" # Fallback video URL
73
-
74
-
75
-
76
  # Streamlit UI
77
-
78
  def main():
79
- st.set_page_config(page_title="Sign Language Translator", layout="wide")
80
- st.markdown("<h1 style='text-align: center; font-size: 40px; font-weight: bold; color: #4CAF50;'>🀟 Sign Language Translator</h1>", unsafe_allow_html=True)
81
 
82
- tab1, tab2, tab3, tab4 = st.tabs([
83
- "πŸ“Έ **Image Load**",
84
- "πŸ“· **Take Picture**",
85
- "πŸŽ₯ **Live**",
86
- "πŸ“ **Text2Sign**"
87
- ])
88
 
89
  with tab1:
 
90
  uploaded_image = st.file_uploader("Upload an image of a hand gesture", type=["png", "jpg", "jpeg"])
91
  if uploaded_image:
92
  image = Image.open(uploaded_image)
@@ -95,6 +36,7 @@ def main():
95
  st.success(f"Detected Gesture: {gesture}")
96
 
97
  with tab2:
 
98
  camera_image = st.camera_input("Take a picture")
99
  if camera_image:
100
  image = Image.open(camera_image)
@@ -103,7 +45,8 @@ def main():
103
  st.success(f"Detected Gesture: {gesture}")
104
 
105
  with tab3:
106
- if st.button("Enable Cam", key="enable_cam"):
 
107
  cap = cv2.VideoCapture(0)
108
  stframe = st.image([])
109
 
@@ -113,37 +56,18 @@ def main():
113
  break
114
  image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
115
  gesture = classify_sign(image)
116
- frame = cv2.putText(frame, f"Detected Gesture: {gesture}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
117
  stframe.image(frame, channels="BGR", use_container_width=True)
118
  time.sleep(5)
119
  cap.release()
120
 
121
  with tab4:
 
122
  text_input = st.text_area("Enter text to generate sign language (Max 200 characters)", max_chars=200)
123
  if st.button("Generate Sign"):
124
  if text_input:
125
- video_url = generate_sign_video(text_input)
126
- if video_url:
127
- st.video(video_url)
128
- else:
129
- st.error("Failed to generate sign language video.")
130
  else:
131
  st.warning("Please enter some text.")
132
 
133
- with st.sidebar:
134
- st.markdown("<h2 style='font-size:28px; font-weight: bold; color: #4CAF50;'>Menu</h2>", unsafe_allow_html=True)
135
- if st.button("πŸ“– About Us", use_container_width=True):
136
- st.markdown("We are team SignAI. We leverage advanced AI and Groq technology to interpret sign language gestures, making communication more accessible.")
137
- if st.button("πŸ“ž Contact Us", use_container_width=True):
138
- st.markdown("""
139
- Phone: +123 456 7890
140
- LinkedIn: [SignAI](#)
141
- Facebook: [SignAI](#)
142
- Email: info@signai.com
143
- Instagram: [@signai_official](#)
144
- """)
145
- if st.button("πŸ’¬ Feedback", use_container_width=True):
146
- st.text_area("We value your feedback! Please share your thoughts below:")
147
-
148
- if __name__ == "__main__":
149
- main()
 
1
  import streamlit as st
2
+ import requests
3
+ import os
4
  from PIL import Image
5
+ from transformers import AutoImageProcessor, AutoModelForImageClassification
6
+ import torch
7
  import cv2
8
  import time
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ # Load the improved Hugging Face model
11
+ processor = AutoImageProcessor.from_pretrained("nateraw/gesture-classification")
12
+ model = AutoModelForImageClassification.from_pretrained("nateraw/gesture-classification")
 
 
 
 
 
13
 
14
+ # Function for sign classification
15
  def classify_sign(image):
16
  image = image.convert("RGB")
17
  inputs = processor(images=image, return_tensors="pt")
18
+ outputs = model(**inputs)
19
+ prediction = torch.argmax(outputs.logits, dim=-1).item()
20
+ labels = ["Hello", "Thank You", "Yes", "No", "Please"] # Update with the actual model labels
21
+ return labels[prediction % len(labels)]
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # Streamlit UI
 
24
  def main():
25
+ st.title("Sign Language Translator")
 
26
 
27
+ tab1, tab2, tab3, tab4 = st.tabs(["Image Load", "Take Picture", "Live", "Text2Sign"])
 
 
 
 
 
28
 
29
  with tab1:
30
+ st.subheader("πŸ“Έ Image Load")
31
  uploaded_image = st.file_uploader("Upload an image of a hand gesture", type=["png", "jpg", "jpeg"])
32
  if uploaded_image:
33
  image = Image.open(uploaded_image)
 
36
  st.success(f"Detected Gesture: {gesture}")
37
 
38
  with tab2:
39
+ st.subheader("πŸ“· Take Picture")
40
  camera_image = st.camera_input("Take a picture")
41
  if camera_image:
42
  image = Image.open(camera_image)
 
45
  st.success(f"Detected Gesture: {gesture}")
46
 
47
  with tab3:
48
+ st.subheader("πŸ“Ή Live")
49
+ if st.button("Enable Cam"):
50
  cap = cv2.VideoCapture(0)
51
  stframe = st.image([])
52
 
 
56
  break
57
  image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
58
  gesture = classify_sign(image)
59
+ frame = cv2.putText(frame, f"Gesture: {gesture}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
60
  stframe.image(frame, channels="BGR", use_container_width=True)
61
  time.sleep(5)
62
  cap.release()
63
 
64
  with tab4:
65
+ st.subheader("πŸ“ Text2Sign")
66
  text_input = st.text_area("Enter text to generate sign language (Max 200 characters)", max_chars=200)
67
  if st.button("Generate Sign"):
68
  if text_input:
69
+ st.video("https://www.w3schools.com/html/mov_bbb.mp4") # Placeholder URL
 
 
 
 
70
  else:
71
  st.warning("Please enter some text.")
72
 
73
+ if __name__ == "__main__":