Subayyal commited on
Commit
80c64db
Β·
verified Β·
1 Parent(s): 54e6287

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -2
app.py CHANGED
@@ -4,6 +4,7 @@ from groq import Groq
4
  from pathlib import Path
5
  from pydub import AudioSegment
6
 
 
7
  # Fetch API key from Secrets
8
  api_key = os.environ.get("GrokAPI")
9
  if not api_key:
@@ -12,16 +13,27 @@ if not api_key:
12
 
13
  client = Groq(api_key=api_key)
14
 
 
15
  st.title("🎀 Audio β†’ AI Text β†’ Speech")
16
 
17
  audio_file = st.file_uploader("Upload audio", type=["wav", "m4a"])
18
 
 
 
 
 
 
 
19
  if audio_file:
20
  try:
 
 
21
  audio_path = Path("input.wav")
22
  audio_segment = AudioSegment.from_file(audio_file)
23
  audio_segment.export(audio_path, format="wav")
24
 
 
 
25
  transcription = client.audio.transcriptions.create(
26
  file=("input.wav", audio_path.read_bytes()),
27
  model="whisper-large-v3",
@@ -29,21 +41,29 @@ if audio_file:
29
  )
30
  st.text_area("πŸ“ Question", transcription, height=150)
31
 
 
 
32
  completion = client.chat.completions.create(
33
  model="llama-3.1-8b-instant",
34
  messages=[{"role": "user", "content": transcription}],
35
  temperature=0.7,
36
- max_completion_tokens=512,
37
  )
38
  answer_text = completion.choices[0].message.content
39
  st.text_area("πŸ’¬ AI Answer", answer_text, height=200)
40
 
 
 
 
 
 
 
41
  speech_path = Path("answer.wav")
42
  response = client.audio.speech.create(
43
  model="playai-tts",
44
  voice="Aaliyah-PlayAI",
45
  response_format="wav",
46
- input=answer_text
47
  )
48
  response.stream_to_file(speech_path)
49
  st.audio(str(speech_path), format="audio/wav")
 
4
  from pathlib import Path
5
  from pydub import AudioSegment
6
 
7
+ # ------------------------------
8
  # Fetch API key from Secrets
9
  api_key = os.environ.get("GrokAPI")
10
  if not api_key:
 
13
 
14
  client = Groq(api_key=api_key)
15
 
16
+ # ------------------------------
17
  st.title("🎀 Audio β†’ AI Text β†’ Speech")
18
 
19
  audio_file = st.file_uploader("Upload audio", type=["wav", "m4a"])
20
 
21
+ def truncate_text(text, max_chars=1000):
22
+ """Limit text size for TTS"""
23
+ if len(text) > max_chars:
24
+ return text[:max_chars] + "\n\n⚠️ Answer truncated for TTS."
25
+ return text
26
+
27
  if audio_file:
28
  try:
29
+ # ------------------------------
30
+ # Convert to WAV
31
  audio_path = Path("input.wav")
32
  audio_segment = AudioSegment.from_file(audio_file)
33
  audio_segment.export(audio_path, format="wav")
34
 
35
+ # ------------------------------
36
+ # Transcribe audio
37
  transcription = client.audio.transcriptions.create(
38
  file=("input.wav", audio_path.read_bytes()),
39
  model="whisper-large-v3",
 
41
  )
42
  st.text_area("πŸ“ Question", transcription, height=150)
43
 
44
+ # ------------------------------
45
+ # Generate AI answer (shorter)
46
  completion = client.chat.completions.create(
47
  model="llama-3.1-8b-instant",
48
  messages=[{"role": "user", "content": transcription}],
49
  temperature=0.7,
50
+ max_completion_tokens=150, # short answer
51
  )
52
  answer_text = completion.choices[0].message.content
53
  st.text_area("πŸ’¬ AI Answer", answer_text, height=200)
54
 
55
+ # ------------------------------
56
+ # Truncate answer to safe length for TTS
57
+ answer_text_limited = truncate_text(answer_text, max_chars=1000)
58
+
59
+ # ------------------------------
60
+ # Convert text β†’ speech
61
  speech_path = Path("answer.wav")
62
  response = client.audio.speech.create(
63
  model="playai-tts",
64
  voice="Aaliyah-PlayAI",
65
  response_format="wav",
66
+ input=answer_text_limited
67
  )
68
  response.stream_to_file(speech_path)
69
  st.audio(str(speech_path), format="audio/wav")