Spaces:

osheina
/

Sign_language_project

Sleeping

App Files Files Community

osheina commited on May 21, 2024

Commit

dbf3631

verified ·

1 Parent(s): 6b962bc

Update pages/Camera.py

Browse files

Files changed (1) hide show

pages/Camera.py +35 -13

pages/Camera.py CHANGED Viewed

@@ -1,34 +1,42 @@
 import logging
 import queue
 from collections import deque
 import streamlit as st
 from streamlit_webrtc import WebRtcMode, webrtc_streamer
 from utils import SLInference
 logger = logging.getLogger(__name__)
-def display():
     """
     Main function of the app.
     """
     config = {
         "path_to_model": "S3D.onnx",
-        "threshold": 0.8,
         "topk": 5,
         "path_to_class_list": "RSL_class_list.txt",
-        "window_size": 8,
         "provider": "OpenVINOExecutionProvider"
     }
-    inference_thread = SLInference(config)
     inference_thread.start()
     webrtc_ctx = webrtc_streamer(
-        key="video-sendrecv",
-        mode=WebRtcMode.SENDRECV,  # Используем SENDRECV для отправки и получения видео
-        media_stream_constraints={"video": True, "audio": False},  # Включаем только видео
     )
     gestures_deque = deque(maxlen=5)
@@ -38,9 +46,18 @@ def display():
     image_place = st.empty()
     text_output = st.empty()
     last_5_gestures = st.empty()
-    if webrtc_ctx.video_receiver:
-        while True:
             try:
                 video_frame = webrtc_ctx.video_receiver.get_frame(timeout=1)
             except queue.Empty:
@@ -49,7 +66,7 @@ def display():
             img_rgb = video_frame.to_ndarray(format="rgb24")
             image_place.image(img_rgb)
-            inference_thread.input_queue.append(video_frame.reformat(224,224).to_ndarray(format="rgb24"))
             gesture = inference_thread.pred
             if gesture not in ['no', '']:
@@ -58,6 +75,11 @@ def display():
                 elif gesture != gestures_deque[-1]:
                     gestures_deque.append(gesture)
-            text_output.markdown(f'<p style="font-size:20px"> Current gesture: {gesture}</p>', unsafe_allow_html=True)
-            last_5_gestures.markdown(f'<p style="font-size:20px"> Last 5 gestures: {" ".join(gestures_deque)}</p>', unsafe_allow_html=True)
-            print(gestures_deque)

 import logging
 import queue
 from collections import deque
+import json
+import tempfile
 import streamlit as st
 from streamlit_webrtc import WebRtcMode, webrtc_streamer
 from utils import SLInference
 logger = logging.getLogger(__name__)
+def main():
     """
     Main function of the app.
     """
     config = {
         "path_to_model": "S3D.onnx",
+        "threshold": 0.3,
         "topk": 5,
         "path_to_class_list": "RSL_class_list.txt",
+        "window_size": 32,
         "provider": "OpenVINOExecutionProvider"
     }
+    # Сохранение конфигурации во временный файл
+    with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.json') as config_file:
+        json.dump(config, config_file)
+        config_file_path = config_file.name
+    inference_thread = SLInference(config_file_path)
     inference_thread.start()
     webrtc_ctx = webrtc_streamer(
+        key="video-sendonly",
+        mode=WebRtcMode.SENDONLY,
+        media_stream_constraints={"video": True},
     )
     gestures_deque = deque(maxlen=5)
     image_place = st.empty()
     text_output = st.empty()
     last_5_gestures = st.empty()
+    st.markdown(
+        """
+        This application is designed to recognize sign language using a webcam feed.
+        The model has been trained to recognize various sign language gestures and display the corresponding text in real-time.
+        This demo app is based on code here: https://github.com/ai-forever/easy_sign
+        The project is open for collaboration. If you have any suggestions or want to contribute, please feel free to reach out.
+        """
+    )
+    while True:
+        if webrtc_ctx.video_receiver:
             try:
                 video_frame = webrtc_ctx.video_receiver.get_frame(timeout=1)
             except queue.Empty:
             img_rgb = video_frame.to_ndarray(format="rgb24")
             image_place.image(img_rgb)
+            inference_thread.input_queue.append(video_frame.reformat(224, 224).to_ndarray(format="rgb24"))
             gesture = inference_thread.pred
             if gesture not in ['no', '']:
                 elif gesture != gestures_deque[-1]:
                     gestures_deque.append(gesture)
+            text_output.markdown(f'<p style="font-size:20px"> Current gesture: {gesture}</p>',
+                                 unsafe_allow_html=True)
+            last_5_gestures.markdown(f'<p style="font-size:20px"> Last 5 gestures: {" ".join(gestures_deque)}</p>',
+                                 unsafe_allow_html=True)
+            print(gestures_deque)
+if __name__ == "__main__":
+    main()