lalaru commited on
Commit
0fc2295
·
verified ·
1 Parent(s): 16af00e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -12
app.py CHANGED
@@ -2,10 +2,16 @@ import gradio as gr
2
  import requests
3
  import json
4
  import os
 
5
 
6
- # Hugging Face API details
 
 
 
 
 
7
  API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
8
- HF_TOKEN = os.getenv("HF_TOKEN") # make sure you set this in Hugging Face "Repository secrets"
9
  headers = {"Authorization": f"Bearer {HF_TOKEN}"}
10
 
11
  # Prompt template
@@ -20,17 +26,17 @@ Your tasks:
20
 
21
  Return only JSON in this exact format:
22
 
23
- {
24
  "cleaned_text": "<cleaned input text in original language>",
25
  "translated_text": "<translation in target language>",
26
  "emotion": "<given emotion>"
27
- }
28
 
29
  Input:
30
- {text}
31
- Source language: {source_lang}
32
- Target language: {target_lang}
33
- Emotion: {emotion}
34
  """
35
 
36
  def query_hf(payload):
@@ -71,18 +77,29 @@ def translate(text, source_lang, target_lang, emotion):
71
 
72
  # Gradio UI with speech input
73
  def gradio_interface(audio, text, source_lang, target_lang, emotion):
74
- # If user spoke, convert audio text using Gradio’s built-in STT
75
  if audio is not None:
76
- text = audio # Gradio’s Speech-to-Text returns text directly
 
 
 
 
 
 
 
 
 
 
77
  if not text:
78
- return {"error": "No input text provided"}
 
79
  result = translate(text, source_lang, target_lang, emotion)
80
  return json.dumps(result, indent=2, ensure_ascii=False)
81
 
82
  iface = gr.Interface(
83
  fn=gradio_interface,
84
  inputs=[
85
- gr.Audio(sources=["microphone"], type="text", label="🎙 Speech Input (or leave empty)"),
86
  gr.Textbox(label="💬 Text Input"),
87
  gr.Radio(["en", "es"], label="Source Language"),
88
  gr.Radio(["en", "es"], label="Target Language"),
 
2
  import requests
3
  import json
4
  import os
5
+ from transformers import pipeline
6
 
7
+ # Load a speech-to-text model
8
+ # Using a smaller model like 'distil-whisper/distil-small.en' for efficiency
9
+ # You might need to install 'pip install transformers torch soundfile librosa'
10
+ asr_pipeline = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en")
11
+
12
+ # Hugging Face API details for Mistral
13
  API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
14
+ HF_TOKEN = os.getenv("HF_TOKEN")
15
  headers = {"Authorization": f"Bearer {HF_TOKEN}"}
16
 
17
  # Prompt template
 
26
 
27
  Return only JSON in this exact format:
28
 
29
+ {{
30
  "cleaned_text": "<cleaned input text in original language>",
31
  "translated_text": "<translation in target language>",
32
  "emotion": "<given emotion>"
33
+ }}
34
 
35
  Input:
36
+ {{text}}
37
+ Source language: {{source_lang}}
38
+ Target language: {{target_lang}}
39
+ Emotion: {{emotion}}
40
  """
41
 
42
  def query_hf(payload):
 
77
 
78
  # Gradio UI with speech input
79
  def gradio_interface(audio, text, source_lang, target_lang, emotion):
80
+ # If audio is provided, transcribe it to text
81
  if audio is not None:
82
+ try:
83
+ # Transcribe the audio file using the ASR pipeline
84
+ transcribed_text = asr_pipeline(audio)["text"]
85
+ # If there's also text input, combine them
86
+ if text:
87
+ text = transcribed_text + " " + text
88
+ else:
89
+ text = transcribed_text
90
+ except Exception as e:
91
+ return json.dumps({"error": f"Speech-to-text transcription failed: {e}"}, indent=2, ensure_ascii=False)
92
+
93
  if not text:
94
+ return json.dumps({"error": "No input text provided"}, indent=2, ensure_ascii=False)
95
+
96
  result = translate(text, source_lang, target_lang, emotion)
97
  return json.dumps(result, indent=2, ensure_ascii=False)
98
 
99
  iface = gr.Interface(
100
  fn=gradio_interface,
101
  inputs=[
102
+ gr.Audio(sources=["microphone"], type="filepath", label="🎙 Speech Input (or leave empty)"),
103
  gr.Textbox(label="💬 Text Input"),
104
  gr.Radio(["en", "es"], label="Source Language"),
105
  gr.Radio(["en", "es"], label="Target Language"),