Devubiodee commited on
Commit
5585cc9
Β·
verified Β·
1 Parent(s): 6cbf652

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -89
app.py CHANGED
@@ -1,94 +1,18 @@
1
- # app.py - Speech to ASL Avatar on Hugging Face Spaces
2
-
3
  import gradio as gr
4
- import whisper
5
- import requests
6
- import tempfile
7
- import os
8
-
9
- # Load API key from HF Space secrets (set in Settings β†’ Secrets)
10
- API_KEY = os.environ.get("SIGN_SPEAK_API_KEY")
11
- if not API_KEY:
12
- raise ValueError("SIGN_SPEAK_API_KEY not set in Space secrets!")
13
-
14
- BASE_URL = "https://api.sign-speak.com"
15
- PRODUCE_SIGN_URL = f"{BASE_URL}/produce-sign"
16
-
17
- def get_sign_language(text: str, request_class="BLOCKING", identity="MALE"):
18
- headers = {
19
- "X-api-key": API_KEY,
20
- "Content-Type": "application/json"
21
- }
22
- payload = {
23
- "english": text.strip(),
24
- "request_class": request_class.upper(),
25
- "identity": identity.upper(),
26
- # Optional: add "model_version": "SLP.2.xs" for smaller/faster if needed
27
- }
28
- response = requests.post(PRODUCE_SIGN_URL, json=payload, headers=headers)
29
-
30
- if response.status_code == 200:
31
- # Save MP4 bytes to temporary file (Gradio Video needs filepath)
32
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
33
- tmp.write(response.content)
34
- return tmp.name
35
- elif response.status_code == 202:
36
- data = response.json()
37
- batch_id = data.get("batch_id")
38
- raise ValueError(f"Batch processing started (ID: {batch_id}). Video will be ready later – check logs or add polling.")
39
- else:
40
- raise ValueError(f"Sign-Speak API error {response.status_code}: {response.text}")
41
 
42
- def transcribe_and_translate(audio_filepath):
43
- if audio_filepath is None:
44
- return "No audio recorded.", None
45
-
46
- try:
47
- # Load Whisper model (use "base" or "small" if "medium" is too slow on CPU)
48
- model = whisper.load_model("small")
49
-
50
- # Transcribe
51
- result = model.transcribe(audio_filepath, language="en")
52
- text = result["text"].strip()
53
-
54
- if not text:
55
- return "No speech detected in the recording.", None
56
-
57
- # Get ASL avatar video
58
- video_path = get_sign_language(text)
59
-
60
- return f"Transcribed: \"{text}\"", video_path
61
-
62
- except Exception as e:
63
- return f"Error: {str(e)}", None
64
 
65
- # Gradio UI
66
- with gr.Blocks(title="Speech β†’ ASL Avatar Translator") as demo:
67
- gr.Markdown("""
68
- # Speech to ASL Avatar
69
- 1. Record your voice using the microphone below
70
- 2. Click **Translate**
71
- 3. Whisper transcribes β†’ Sign-Speak generates ASL signing video
72
- """)
73
-
74
- with gr.Row():
75
- audio_input = gr.Audio(
76
- sources=["microphone"], # ← Fixed: "sources" (list), not "source"
77
- type="filepath",
78
- label="Speak here (click record)",
79
- format="wav" # Helps Whisper compatibility
80
- )
81
- submit_btn = gr.Button("Translate", variant="primary")
82
-
83
- transcript_output = gr.Textbox(label="Transcribed Text / Status", lines=3)
84
- video_output = gr.Video(label="ASL Avatar Signing Video", autoplay=True)
85
-
86
- # Wire up the button
87
- submit_btn.click(
88
- fn=transcribe_and_translate,
89
- inputs=audio_input,
90
- outputs=[transcript_output, video_output]
91
- )
92
 
93
- # Launch (HF Spaces ignores server_name/port)
94
  demo.launch()
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ def greet(audio):
4
+ return "Audio received! (length: {} seconds)".format(len(audio) if audio else 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ demo = gr.Interface(
7
+ fn=greet,
8
+ inputs=gr.Audio(
9
+ sources=["microphone"],
10
+ type="numpy", # or "filepath"
11
+ label="Record something",
12
+ format="wav"
13
+ ),
14
+ outputs="text",
15
+ title="Mic Test"
16
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
 
18
  demo.launch()