Sign-Language-Interpreter

Runtime error

App Files Files Community

HuggingFace-SK commited on Jan 1, 2025

Commit

717217e

1 Parent(s): bb63de4

unify android and web html

Browse files

Files changed (2) hide show

main.py +1 -1
templates/browser-detect.html +102 -27

main.py CHANGED Viewed

@@ -11,4 +11,4 @@ def send_report():
     return send_from_directory("better_exported", "model.tflite")
 if (__name__ == '__main__'):
-    app.run( host='0.0.0.0', port=7860)

     return send_from_directory("better_exported", "model.tflite")
 if (__name__ == '__main__'):
+    app.run( host='0.0.0.0', port=7860)

templates/browser-detect.html CHANGED Viewed

@@ -3,7 +3,7 @@
 <head></head>
 <meta charset="UTF-8">
 <title>Sign Language Interpreter</title>
@@ -11,24 +11,39 @@
 <script>
     window.console = window.console || function (t) { };
 </script>
 <link rel="stylesheet" type="text/css" href="static/browser_detect.css" />
 </head>
 <body translate="no">
     <script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js"
         crossorigin="anonymous"></script>
     <script src="https://cdn.jsdelivr.net/npm/@mediapipe/hands/hands.js" crossorigin="anonymous"></script>
     <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-core"></script>
     <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-cpu"></script>
     <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-tflite/dist/tf-tflite.min.js"></script>
     <div class="container">
-        <video id="webcam" style="display:none" autoplay="" playsinline=""></video>
         <div class="canvas_wrapper" id="canvas_wrapper">
             <button id="switch-camera" style="display:none; position: absolute; top:10px; left:10px; padding:5px; height:40px; width:40px; text-align: center; border-radius: 12.25px; font-size: 20px; font-weight: 900; border:none;   background-color: #f2f2f2; color:black;
   box-shadow: 0px 4px 20px 4px rgba(0, 0, 0, 0.38); z-index:100">
@@ -53,13 +68,47 @@
                 <span>Listen 🔊</span>
             </button>
         </div>
         <center>
             <script>
                 var synthesis = window.speechSynthesis;
                 if ('speechSynthesis' in window) {
                     var synthesis = window.speechSynthesis;
                     // Get the first `en` language voice in the list
@@ -70,24 +119,42 @@
                     // Create an utterance object
                 } else {
                     console.log('Text-to-speech not supported.');
                 }
                 function speak(text) {
-                    if ('speechSynthesis' in window) {
-                        // Speak the utterance
-                    var utterance = new SpeechSynthesisUtterance(text);
-                    // Set utterance properties
-                    utterance.voice = voice;
-                    utterance.pitch = 0.6;
-                    utterance.rate = 0.8;
-                    utterance.volume = 0.8;
                         synthesis.speak(utterance);
                     } else {
-                        console.log('Text-to-speech not supported.');
                     }
                 }
                 var word_list = []
@@ -103,8 +170,9 @@
             <script type="module">
-                import { HandLandmarker, FilesetResolver } from "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.0";
                 let handLandmarker = undefined;
                 let runningMode = "IMAGE";
                 let enableWebcamButton;
@@ -116,10 +184,11 @@
                 // loading. Machine Learning models can be large and take a moment to
                 // get everything needed to run.
                 const createHandLandmarker = async () => {
-                    const vision = await FilesetResolver.forVisionTasks("https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.0/wasm");
                     handLandmarker = await HandLandmarker.createFromOptions(vision, {
                         baseOptions: {
-                            modelAssetPath: `https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task`,
                             delegate: "GPU"
                         },
                         runningMode: runningMode,
@@ -128,7 +197,8 @@
                 };
                 createHandLandmarker();
-                const MODEL_PATH = "/exported"
                 var objectDetector = tflite.loadTFLiteModel(MODEL_PATH);
                 /********************************************************************
@@ -193,10 +263,15 @@
                         }
                     };
                     // Activate the webcam stream.
-                    navigator.mediaDevices.getUserMedia(constraints).then((stream) => {
-                        video.srcObject = stream;
-                        video.addEventListener("loadeddata", predictWebcam);
-                    });
                 }
                 let lastVideoTime = -1;
                 let results = undefined;
@@ -537,12 +612,12 @@
                 }
             </script>
-            <script src="https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.0/wasm/vision_wasm_internal.js"
-                crossorigin="anonymous"></script>
 </body>
 </html>

 <head></head>
 <meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=0" />
 <title>Sign Language Interpreter</title>
 <script>
     window.console = window.console || function (t) { };
 </script>
+<!-- For Android
+<link rel="stylesheet" type="text/css" href="http://127.0.0.1:8125/assets/static/browser_detect.css" />
+ -->
+<!-- For Web -->
 <link rel="stylesheet" type="text/css" href="static/browser_detect.css" />
 </head>
 <body translate="no">
+    <!-- For Android
+    <script src="../assets/ipc/androidjs.js"></script>
+    <script src="http://127.0.0.1:8125/assets/static/drawing_utils.js" crossorigin="anonymous"></script>
+    <script src="http://127.0.0.1:8125/assets/static/hands.js" crossorigin="anonymous"></script>
+    <script src="http://127.0.0.1:8125/assets/static/tfjs-core"></script>
+    <script src="http://127.0.0.1:8125/assets/static/tfjs-backend-cpu"></script>
+    <script src="http://127.0.0.1:8125/assets/static/tf-tflite.min.js"></script>
+    <script src="http://127.0.0.1:8125/assets/static/vision_wasm_internal.js" crossorigin="anonymous"></script>
+     -->
+    <!-- For Web -->
     <script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js"
         crossorigin="anonymous"></script>
     <script src="https://cdn.jsdelivr.net/npm/@mediapipe/hands/hands.js" crossorigin="anonymous"></script>
     <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-core"></script>
     <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-cpu"></script>
     <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-tflite/dist/tf-tflite.min.js"></script>
     <div class="container">
+        <video id="webcam" style="display:none" autoplay playsinline muted></video>
         <div class="canvas_wrapper" id="canvas_wrapper">
             <button id="switch-camera" style="display:none; position: absolute; top:10px; left:10px; padding:5px; height:40px; width:40px; text-align: center; border-radius: 12.25px; font-size: 20px; font-weight: 900; border:none;   background-color: #f2f2f2; color:black;
   box-shadow: 0px 4px 20px 4px rgba(0, 0, 0, 0.38); z-index:100">
                 <span>Listen 🔊</span>
             </button>
+            <audio id="audioPlayer">-</audio>
+        </div>
+        <div id="logUI">
         </div>
         <center>
             <script>
+                var speechSupported = true
+                var prevSpeech = ""
+                logUI = document.getElementById("logUI")
+                function logMessage(msg) {
+                    const span = document.createElement('span');
+                    span.textContent = msg;
+                    logUI.appendChild(span);
+                    logUI.appendChild(document.createElement('br')); // Add a line break
+                }
+                const originalFetch = window.fetch;
+                // Override the fetch function
+                window.fetch = async function (input, init) {
+                    // Convert input to URL if it's a Request object
+                    const url = typeof input === 'string' ? input : input.url;
+                    var newUrl = url
+                    if (url == 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.0/wasm/vision_wasm_internal.wasm') {
+                        // newUrl = 'http://127.0.0.1:8125/assets/static/vision_wasm_internal.wasm' //For Android
+                        newUrl = 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.0/wasm/vision_wasm_internal.wasm' // For Web
+                    }
+                    console.log("This was FETCHED: ", newUrl)
+                    // Call the original fetch function with the new URL
+                    return originalFetch(newUrl, init);
+                };
                 var synthesis = window.speechSynthesis;
                 if ('speechSynthesis' in window) {
                     var synthesis = window.speechSynthesis;
                     // Get the first `en` language voice in the list
                     // Create an utterance object
                 } else {
+                    speechSupported = false;
                     console.log('Text-to-speech not supported.');
                 }
                 function speak(text) {
+                    console.log("speech api support", speechSupported)
+                    console.log("condition: ", !speechSupported)
+                    console.log("condition2: ", speechSupported == false)
+                    if (!speechSupported) {
+                        console.log("speech api support", speechSupported)
+                        const audioPlayer = document.getElementById('audioPlayer');
+                        if (prevSpeech != text) {
+                            prevSpeech = text
+                            audioPlayer.src = 'http://127.0.0.1:8125/speech?t=' + text; // Set the audio source
+                            console.log("Set src: ", audioPlayer.src)
+                        }
+                        audioPlayer.play()      // Play the audio
+                            .then(() => {
+                                console.log('Audio is playing');
+                            })
+                            .catch(error => {
+                                console.error('Error playing audio:', error);
+                                prevSpeech = ''
+                            });
+                    } else
+                    if ('speechSynthesis' in window) {
+                        var utterance = new SpeechSynthesisUtterance(text);
+                        utterance.voice = voice;
+                        utterance.pitch = 0.6;
+                        utterance.rate = 0.8;
+                        utterance.volume = 0.8;
                         synthesis.speak(utterance);
                     } else {
+                        console.log("Text to speech is now not supported")
                     }
                 }
                 var word_list = []
             <script type="module">
+                //import { HandLandmarker, FilesetResolver } from "http://127.0.0.1:8125/assets/static/tasks-vision@0.10.0" // For Android
+                import { HandLandmarker, FilesetResolver } from "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.0"; // For Web
                 let handLandmarker = undefined;
                 let runningMode = "IMAGE";
                 let enableWebcamButton;
                 // loading. Machine Learning models can be large and take a moment to
                 // get everything needed to run.
                 const createHandLandmarker = async () => {
+                    const vision = await FilesetResolver.forVisionTasks("https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.0/wasm"); // This doesnt really matter as this is already imported somewhere else, and the code runs fine without the request
                     handLandmarker = await HandLandmarker.createFromOptions(vision, {
                         baseOptions: {
+                            // modelAssetPath: `http://127.0.0.1:8125/assets/static/hand_landmarker.task`, // For Android
+                            modelAssetPath: `https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task`, // For Web
                             delegate: "GPU"
                         },
                         runningMode: runningMode,
                 };
                 createHandLandmarker();
+                // const MODEL_PATH = "http://127.0.0.1:8125/assets/static/model.tflite" // For Android
+                const MODEL_PATH = "/exported" // For Web
                 var objectDetector = tflite.loadTFLiteModel(MODEL_PATH);
                 /********************************************************************
                         }
                     };
                     // Activate the webcam stream.
+                    navigator.mediaDevices.getUserMedia(constraints)
+                        .then((stream) => {
+                            video.srcObject = stream;
+                            video.play();
+                            video.addEventListener("loadeddata", predictWebcam);
+                        })
+                        .catch((error) => {
+                            console.error("Error accessing the camera: ", error.name, error.message, error.code);
+                        });
                 }
                 let lastVideoTime = -1;
                 let results = undefined;
                 }
             </script>
+            <script src="https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.0/wasm/vision_wasm_internal.js"
+                crossorigin="anonymous"></script>
 </body>
 </html>