Devubiodee commited on
Commit
369df34
Β·
verified Β·
1 Parent(s): 540416d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -31
app.py CHANGED
@@ -1,13 +1,15 @@
 
 
1
  import gradio as gr
2
  import whisper
3
  import requests
4
  import tempfile
5
  import os
6
 
7
- # Load your Sign-Speak API key from HF secrets (set in Space settings)
8
  API_KEY = os.environ.get("SIGN_SPEAK_API_KEY")
9
  if not API_KEY:
10
- raise ValueError("Set SIGN_SPEAK_API_KEY in HF Space secrets")
11
 
12
  BASE_URL = "https://api.sign-speak.com"
13
  PRODUCE_SIGN_URL = f"{BASE_URL}/produce-sign"
@@ -21,59 +23,72 @@ def get_sign_language(text: str, request_class="BLOCKING", identity="MALE"):
21
  "english": text.strip(),
22
  "request_class": request_class.upper(),
23
  "identity": identity.upper(),
 
24
  }
25
  response = requests.post(PRODUCE_SIGN_URL, json=payload, headers=headers)
 
26
  if response.status_code == 200:
27
- # Save MP4 bytes to temp file
28
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
29
  tmp.write(response.content)
30
  return tmp.name
31
  elif response.status_code == 202:
32
  data = response.json()
33
  batch_id = data.get("batch_id")
34
- raise ValueError(f"Batch request: Poll /produce-sign/{batch_id} later")
35
  else:
36
- raise ValueError(f"Error {response.status_code}: {response.text}")
37
 
38
- def transcribe_and_translate(audio):
39
- if audio is None:
40
  return "No audio recorded.", None
41
 
42
- # Load Whisper model (medium for accuracy; use 'base' for faster)
43
- model = whisper.load_model("medium")
44
-
45
- # Transcribe audio file
46
- result = model.transcribe(audio, language="en")
47
- text = result["text"].strip()
48
-
49
- if not text:
50
- return "No speech detected.", None
51
-
52
- # Get ASL video from Sign-Speak
53
  try:
 
 
 
 
 
 
 
 
 
 
 
54
  video_path = get_sign_language(text)
55
- return f"Transcribed: '{text}'", video_path
56
- except ValueError as e:
57
- return str(e), None
 
 
58
 
59
- # Gradio interface
60
- with gr.Blocks(title="Speech to ASL Avatar") as demo:
61
  gr.Markdown("""
62
- # Speech β†’ ASL Avatar Translator
63
- Speak into the mic β†’ Whisper transcribes β†’ Sign-Speak generates ASL video.
 
 
64
  """)
65
 
66
  with gr.Row():
67
- audio_input = gr.Audio(source="microphone", type="filepath", label="Record Audio")
68
- submit_btn = gr.Button("Translate")
 
 
 
 
 
69
 
70
- transcript_output = gr.Textbox(label="Transcribed Text / Status")
71
- video_output = gr.Video(label="ASL Avatar Video")
72
 
 
73
  submit_btn.click(
74
- transcribe_and_translate,
75
- inputs=[audio_input],
76
  outputs=[transcript_output, video_output]
77
  )
78
 
 
79
  demo.launch()
 
1
+ # app.py - Speech to ASL Avatar on Hugging Face Spaces
2
+
3
  import gradio as gr
4
  import whisper
5
  import requests
6
  import tempfile
7
  import os
8
 
9
+ # Load API key from HF Space secrets (set in Settings β†’ Secrets)
10
  API_KEY = os.environ.get("SIGN_SPEAK_API_KEY")
11
  if not API_KEY:
12
+ raise ValueError("SIGN_SPEAK_API_KEY not set in Space secrets!")
13
 
14
  BASE_URL = "https://api.sign-speak.com"
15
  PRODUCE_SIGN_URL = f"{BASE_URL}/produce-sign"
 
23
  "english": text.strip(),
24
  "request_class": request_class.upper(),
25
  "identity": identity.upper(),
26
+ # Optional: add "model_version": "SLP.2.xs" for smaller/faster if needed
27
  }
28
  response = requests.post(PRODUCE_SIGN_URL, json=payload, headers=headers)
29
+
30
  if response.status_code == 200:
31
+ # Save MP4 bytes to temporary file (Gradio Video needs filepath)
32
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
33
  tmp.write(response.content)
34
  return tmp.name
35
  elif response.status_code == 202:
36
  data = response.json()
37
  batch_id = data.get("batch_id")
38
+ raise ValueError(f"Batch processing started (ID: {batch_id}). Video will be ready later – check logs or add polling.")
39
  else:
40
+ raise ValueError(f"Sign-Speak API error {response.status_code}: {response.text}")
41
 
42
+ def transcribe_and_translate(audio_filepath):
43
+ if audio_filepath is None:
44
  return "No audio recorded.", None
45
 
 
 
 
 
 
 
 
 
 
 
 
46
  try:
47
+ # Load Whisper model (use "base" or "small" if "medium" is too slow on CPU)
48
+ model = whisper.load_model("medium")
49
+
50
+ # Transcribe
51
+ result = model.transcribe(audio_filepath, language="en")
52
+ text = result["text"].strip()
53
+
54
+ if not text:
55
+ return "No speech detected in the recording.", None
56
+
57
+ # Get ASL avatar video
58
  video_path = get_sign_language(text)
59
+
60
+ return f"Transcribed: \"{text}\"", video_path
61
+
62
+ except Exception as e:
63
+ return f"Error: {str(e)}", None
64
 
65
+ # Gradio UI
66
+ with gr.Blocks(title="Speech β†’ ASL Avatar Translator") as demo:
67
  gr.Markdown("""
68
+ # Speech to ASL Avatar
69
+ 1. Record your voice using the microphone below
70
+ 2. Click **Translate**
71
+ 3. Whisper transcribes β†’ Sign-Speak generates ASL signing video
72
  """)
73
 
74
  with gr.Row():
75
+ audio_input = gr.Audio(
76
+ sources=["microphone"], # ← Fixed: "sources" (list), not "source"
77
+ type="filepath",
78
+ label="Speak here (click record)",
79
+ format="wav" # Helps Whisper compatibility
80
+ )
81
+ submit_btn = gr.Button("Translate", variant="primary")
82
 
83
+ transcript_output = gr.Textbox(label="Transcribed Text / Status", lines=3)
84
+ video_output = gr.Video(label="ASL Avatar Signing Video", autoplay=True)
85
 
86
+ # Wire up the button
87
  submit_btn.click(
88
+ fn=transcribe_and_translate,
89
+ inputs=audio_input,
90
  outputs=[transcript_output, video_output]
91
  )
92
 
93
+ # Launch (HF Spaces ignores server_name/port)
94
  demo.launch()