Spaces:

lalaru
/

Translation-code

Sleeping

App Files Files Community

lalaru commited on Sep 10, 2025

Commit

0fc2295

verified ·

1 Parent(s): 16af00e

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -12

app.py CHANGED Viewed

@@ -2,10 +2,16 @@ import gradio as gr
 import requests
 import json
 import os
-# Hugging Face API details
 API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
-HF_TOKEN = os.getenv("HF_TOKEN")  # make sure you set this in Hugging Face "Repository secrets"
 headers = {"Authorization": f"Bearer {HF_TOKEN}"}
 # Prompt template
@@ -20,17 +26,17 @@ Your tasks:
 Return only JSON in this exact format:
-{
   "cleaned_text": "<cleaned input text in original language>",
   "translated_text": "<translation in target language>",
   "emotion": "<given emotion>"
-}
 Input:
-{text}
-Source language: {source_lang}
-Target language: {target_lang}
-Emotion: {emotion}
 """
 def query_hf(payload):
@@ -71,18 +77,29 @@ def translate(text, source_lang, target_lang, emotion):
 # Gradio UI with speech input
 def gradio_interface(audio, text, source_lang, target_lang, emotion):
-    # If user spoke, convert audio → text using Gradio’s built-in STT
     if audio is not None:
-        text = audio  # Gradio’s Speech-to-Text returns text directly
     if not text:
-        return {"error": "No input text provided"}
     result = translate(text, source_lang, target_lang, emotion)
     return json.dumps(result, indent=2, ensure_ascii=False)
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
-        gr.Audio(sources=["microphone"], type="text", label="🎙 Speech Input (or leave empty)"),
         gr.Textbox(label="💬 Text Input"),
         gr.Radio(["en", "es"], label="Source Language"),
         gr.Radio(["en", "es"], label="Target Language"),

 import requests
 import json
 import os
+from transformers import pipeline
+# Load a speech-to-text model
+# Using a smaller model like 'distil-whisper/distil-small.en' for efficiency
+# You might need to install 'pip install transformers torch soundfile librosa'
+asr_pipeline = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en")
+# Hugging Face API details for Mistral
 API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
+HF_TOKEN = os.getenv("HF_TOKEN")
 headers = {"Authorization": f"Bearer {HF_TOKEN}"}
 # Prompt template
 Return only JSON in this exact format:
+{{
   "cleaned_text": "<cleaned input text in original language>",
   "translated_text": "<translation in target language>",
   "emotion": "<given emotion>"
+}}
 Input:
+{{text}}
+Source language: {{source_lang}}
+Target language: {{target_lang}}
+Emotion: {{emotion}}
 """
 def query_hf(payload):
 # Gradio UI with speech input
 def gradio_interface(audio, text, source_lang, target_lang, emotion):
+    # If audio is provided, transcribe it to text
     if audio is not None:
+        try:
+            # Transcribe the audio file using the ASR pipeline
+            transcribed_text = asr_pipeline(audio)["text"]
+            # If there's also text input, combine them
+            if text:
+                text = transcribed_text + " " + text
+            else:
+                text = transcribed_text
+        except Exception as e:
+            return json.dumps({"error": f"Speech-to-text transcription failed: {e}"}, indent=2, ensure_ascii=False)
     if not text:
+        return json.dumps({"error": "No input text provided"}, indent=2, ensure_ascii=False)
     result = translate(text, source_lang, target_lang, emotion)
     return json.dumps(result, indent=2, ensure_ascii=False)
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
+        gr.Audio(sources=["microphone"], type="filepath", label="🎙 Speech Input (or leave empty)"),
         gr.Textbox(label="💬 Text Input"),
         gr.Radio(["en", "es"], label="Source Language"),
         gr.Radio(["en", "es"], label="Target Language"),