Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,10 +2,16 @@ import gradio as gr
|
|
| 2 |
import requests
|
| 3 |
import json
|
| 4 |
import os
|
|
|
|
| 5 |
|
| 6 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
|
| 8 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 9 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
| 10 |
|
| 11 |
# Prompt template
|
|
@@ -20,17 +26,17 @@ Your tasks:
|
|
| 20 |
|
| 21 |
Return only JSON in this exact format:
|
| 22 |
|
| 23 |
-
{
|
| 24 |
"cleaned_text": "<cleaned input text in original language>",
|
| 25 |
"translated_text": "<translation in target language>",
|
| 26 |
"emotion": "<given emotion>"
|
| 27 |
-
}
|
| 28 |
|
| 29 |
Input:
|
| 30 |
-
{text}
|
| 31 |
-
Source language: {source_lang}
|
| 32 |
-
Target language: {target_lang}
|
| 33 |
-
Emotion: {emotion}
|
| 34 |
"""
|
| 35 |
|
| 36 |
def query_hf(payload):
|
|
@@ -71,18 +77,29 @@ def translate(text, source_lang, target_lang, emotion):
|
|
| 71 |
|
| 72 |
# Gradio UI with speech input
|
| 73 |
def gradio_interface(audio, text, source_lang, target_lang, emotion):
|
| 74 |
-
# If
|
| 75 |
if audio is not None:
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
if not text:
|
| 78 |
-
return {"error": "No input text provided"}
|
|
|
|
| 79 |
result = translate(text, source_lang, target_lang, emotion)
|
| 80 |
return json.dumps(result, indent=2, ensure_ascii=False)
|
| 81 |
|
| 82 |
iface = gr.Interface(
|
| 83 |
fn=gradio_interface,
|
| 84 |
inputs=[
|
| 85 |
-
gr.Audio(sources=["microphone"], type="
|
| 86 |
gr.Textbox(label="💬 Text Input"),
|
| 87 |
gr.Radio(["en", "es"], label="Source Language"),
|
| 88 |
gr.Radio(["en", "es"], label="Target Language"),
|
|
|
|
| 2 |
import requests
|
| 3 |
import json
|
| 4 |
import os
|
| 5 |
+
from transformers import pipeline
|
| 6 |
|
| 7 |
+
# Load a speech-to-text model
|
| 8 |
+
# Using a smaller model like 'distil-whisper/distil-small.en' for efficiency
|
| 9 |
+
# You might need to install 'pip install transformers torch soundfile librosa'
|
| 10 |
+
asr_pipeline = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en")
|
| 11 |
+
|
| 12 |
+
# Hugging Face API details for Mistral
|
| 13 |
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
|
| 14 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 15 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
| 16 |
|
| 17 |
# Prompt template
|
|
|
|
| 26 |
|
| 27 |
Return only JSON in this exact format:
|
| 28 |
|
| 29 |
+
{{
|
| 30 |
"cleaned_text": "<cleaned input text in original language>",
|
| 31 |
"translated_text": "<translation in target language>",
|
| 32 |
"emotion": "<given emotion>"
|
| 33 |
+
}}
|
| 34 |
|
| 35 |
Input:
|
| 36 |
+
{{text}}
|
| 37 |
+
Source language: {{source_lang}}
|
| 38 |
+
Target language: {{target_lang}}
|
| 39 |
+
Emotion: {{emotion}}
|
| 40 |
"""
|
| 41 |
|
| 42 |
def query_hf(payload):
|
|
|
|
| 77 |
|
| 78 |
# Gradio UI with speech input
|
| 79 |
def gradio_interface(audio, text, source_lang, target_lang, emotion):
|
| 80 |
+
# If audio is provided, transcribe it to text
|
| 81 |
if audio is not None:
|
| 82 |
+
try:
|
| 83 |
+
# Transcribe the audio file using the ASR pipeline
|
| 84 |
+
transcribed_text = asr_pipeline(audio)["text"]
|
| 85 |
+
# If there's also text input, combine them
|
| 86 |
+
if text:
|
| 87 |
+
text = transcribed_text + " " + text
|
| 88 |
+
else:
|
| 89 |
+
text = transcribed_text
|
| 90 |
+
except Exception as e:
|
| 91 |
+
return json.dumps({"error": f"Speech-to-text transcription failed: {e}"}, indent=2, ensure_ascii=False)
|
| 92 |
+
|
| 93 |
if not text:
|
| 94 |
+
return json.dumps({"error": "No input text provided"}, indent=2, ensure_ascii=False)
|
| 95 |
+
|
| 96 |
result = translate(text, source_lang, target_lang, emotion)
|
| 97 |
return json.dumps(result, indent=2, ensure_ascii=False)
|
| 98 |
|
| 99 |
iface = gr.Interface(
|
| 100 |
fn=gradio_interface,
|
| 101 |
inputs=[
|
| 102 |
+
gr.Audio(sources=["microphone"], type="filepath", label="🎙 Speech Input (or leave empty)"),
|
| 103 |
gr.Textbox(label="💬 Text Input"),
|
| 104 |
gr.Radio(["en", "es"], label="Source Language"),
|
| 105 |
gr.Radio(["en", "es"], label="Target Language"),
|