shiue2000 commited on
Commit
86936f4
Β·
verified Β·
1 Parent(s): 2263119

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -22
app.py CHANGED
@@ -6,52 +6,50 @@ from openai import OpenAI
6
  # πŸ”‘ Load OpenAI API key from Hugging Face Space secrets
7
  client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
8
 
9
- # 🎧 Load Whisper model once (small = faster; use "base" for lighter)
10
  whisper_model = whisper.load_model("small")
11
 
12
- def transcribe_and_translate(audio_path):
13
  if audio_path is None:
14
- return "No audio detected.", "No translation."
15
 
16
- # Step 1: Transcribe audio β†’ Chinese text
17
  result = whisper_model.transcribe(audio_path, language="zh")
18
  chinese_text = result.get("text", "").strip()
19
 
20
  if not chinese_text:
21
- return "⚠️ Could not transcribe audio.", "No translation."
22
-
23
- # Step 2: Send transcription to ChatGPT for English translation
24
- messages = [
25
- {"role": "system", "content": "You are a professional translator. Translate the following Chinese into fluent, natural English."},
26
- {"role": "user", "content": chinese_text}
27
- ]
28
 
 
29
  try:
30
  response = client.chat.completions.create(
31
- model="gpt-4o-mini", # can use "gpt-4o" for higher quality
32
- messages=messages,
33
- temperature=0
 
 
 
34
  )
35
- english_text = response.choices[0].message.content.strip()
36
  except Exception as e:
37
- english_text = f"⚠️ Translation failed: {e}"
38
 
39
- return chinese_text, english_text
40
 
41
  # πŸš€ Gradio UI
42
  app = gr.Interface(
43
- fn=transcribe_and_translate,
44
  inputs=gr.Audio(
45
  sources=["microphone", "upload"],
46
  type="filepath",
47
  label="πŸŽ™οΈ Speak Chinese or Upload Audio"
48
  ),
49
  outputs=[
50
- gr.Textbox(label="🈢 Chinese Transcription"),
51
- gr.Textbox(label="🌍 English Translation (ChatGPT)")
52
  ],
53
- title="🎀 Chinese Voice β†’ English Translator",
54
- description="Speak Chinese or upload audio. Whisper transcribes, then ChatGPT translates into English."
55
  )
56
 
57
  if __name__ == "__main__":
 
6
  # πŸ”‘ Load OpenAI API key from Hugging Face Space secrets
7
  client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
8
 
9
+ # 🎧 Load Whisper model once
10
  whisper_model = whisper.load_model("small")
11
 
12
+ def audio_prompt_to_chatgpt(audio_path):
13
  if audio_path is None:
14
+ return "No audio detected.", ""
15
 
16
+ # 1️⃣ Transcribe audio β†’ Chinese text
17
  result = whisper_model.transcribe(audio_path, language="zh")
18
  chinese_text = result.get("text", "").strip()
19
 
20
  if not chinese_text:
21
+ return "⚠️ Could not transcribe audio.", ""
 
 
 
 
 
 
22
 
23
+ # 2️⃣ Send transcription as a prompt to ChatGPT
24
  try:
25
  response = client.chat.completions.create(
26
+ model="gpt-4o-mini", # or gpt-4o for higher quality
27
+ messages=[
28
+ {"role": "system", "content": "You are a helpful assistant. Follow the user's instructions."},
29
+ {"role": "user", "content": chinese_text}
30
+ ],
31
+ temperature=0.7
32
  )
33
+ chatgpt_reply = response.choices[0].message.content.strip()
34
  except Exception as e:
35
+ chatgpt_reply = f"⚠️ ChatGPT failed: {e}"
36
 
37
+ return chinese_text, chatgpt_reply
38
 
39
  # πŸš€ Gradio UI
40
  app = gr.Interface(
41
+ fn=audio_prompt_to_chatgpt,
42
  inputs=gr.Audio(
43
  sources=["microphone", "upload"],
44
  type="filepath",
45
  label="πŸŽ™οΈ Speak Chinese or Upload Audio"
46
  ),
47
  outputs=[
48
+ gr.Textbox(label="🈢 Transcribed Chinese Prompt"),
49
+ gr.Textbox(label="πŸ€– ChatGPT Response")
50
  ],
51
+ title="🎀 Chinese Voice β†’ ChatGPT Prompt",
52
+ description="Speak Chinese or upload audio. Whisper transcribes, then ChatGPT executes your instructions."
53
  )
54
 
55
  if __name__ == "__main__":