Emeritus-21 commited on
Commit
240bc09
·
verified ·
1 Parent(s): 51c1217

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -36
app.py CHANGED
@@ -1,26 +1,25 @@
1
  import os
2
  import json
 
 
 
3
  from dotenv import load_dotenv
4
  from pypdf import PdfReader
5
  import gradio as gr
6
- from elevenlabs.client import ElevenLabs
7
  from huggingface_hub import InferenceClient
8
  from spaces import GPU
9
 
 
 
 
10
  # Load environment keys
11
  load_dotenv()
12
- ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
13
  HF_TOKEN = os.getenv("HF_TOKEN") # Automatically set in Spaces
14
 
15
- # Initialize Clients
16
  if HF_TOKEN:
17
- # We use the 72B Instruct model for best reasoning/JSON performance
18
  hf_client = InferenceClient(model="Qwen/Qwen2.5-72B-Instruct", token=HF_TOKEN)
19
 
20
- client = None
21
- if ELEVEN_API_KEY:
22
- client = ElevenLabs(api_key=ELEVEN_API_KEY)
23
-
24
  # =========================
25
  # HELPER FUNCTIONS
26
  # =========================
@@ -36,6 +35,12 @@ def extract_text_from_pdf(pdf):
36
  except Exception as e:
37
  return f"Error reading PDF: {e}"
38
 
 
 
 
 
 
 
39
  # =========================
40
  # CORE LOGIC
41
  # =========================
@@ -52,8 +57,8 @@ def generate_script(pdf_file, persona_style):
52
  if not pdf_file:
53
  return "⚠️ Upload a PDF first.", None, new_state
54
 
55
- if not HF_TOKEN or not ELEVEN_API_KEY:
56
- return "⚠️ Missing Keys (HF_TOKEN or ELEVEN_API_KEY).", None, new_state
57
 
58
  pdf_text = extract_text_from_pdf(pdf_file)
59
  new_state["full_text"] = pdf_text
@@ -63,7 +68,7 @@ def generate_script(pdf_file, persona_style):
63
  "Gossip Columnist": "You are a gossip columnist host. Tone: Dramatic, sensationalist, and excited.",
64
  }
65
 
66
- # Qwen System Prompt setup
67
  system_instruction = f"""
68
  {prompts.get(persona_style)}
69
 
@@ -105,8 +110,8 @@ def generate_script(pdf_file, persona_style):
105
  except Exception as e:
106
  return f"Error with Qwen: {e}", None, new_state
107
 
108
- @GPU
109
- def play_next_chunk(state_data):
110
  if not state_data or not state_data.get("script"):
111
  return None, "⚠️ No script generated yet.", state_data
112
 
@@ -118,21 +123,20 @@ def play_next_chunk(state_data):
118
 
119
  line = script[idx]
120
 
121
- voice_id = "nPczCjz82tPNOwVbpGE2" # Default voice
122
- if state_data["persona"] == "Gossip Columnist" and line["speaker"] == "Host B":
123
- voice_id = "21m00Tcm4TlvDq8ikWAM"
 
 
 
 
 
 
 
124
 
125
  try:
126
- audio_stream = client.generate(
127
- text=line["text"],
128
- voice=voice_id,
129
- model="eleven_monolingual_v1"
130
- )
131
-
132
  save_path = f"temp_{idx}.mp3"
133
- with open(save_path, "wb") as f:
134
- for chunk in audio_stream:
135
- f.write(chunk)
136
 
137
  state_data["current_index"] += 1
138
  return save_path, f"{line['speaker']}: {line['text']}", state_data
@@ -140,8 +144,7 @@ def play_next_chunk(state_data):
140
  except Exception as e:
141
  return None, f"Audio error: {e}", state_data
142
 
143
- @GPU
144
- def interrupt_and_ask(question, state_data):
145
  if not state_data or not state_data.get("full_text"):
146
  return None, "Upload PDF first.", state_data
147
 
@@ -159,16 +162,8 @@ def interrupt_and_ask(question, state_data):
159
  return None, f"Qwen Error: {e}", state_data
160
 
161
  try:
162
- audio_stream = client.generate(
163
- text=answer,
164
- voice="nPczCjz82tPNOwVbpGE2",
165
- model="eleven_monolingual_v1"
166
- )
167
-
168
  save_path = "interrupt.mp3"
169
- with open(save_path, "wb") as f:
170
- for chunk in audio_stream:
171
- f.write(chunk)
172
 
173
  return save_path, answer, state_data
174
  except Exception as e:
 
1
  import os
2
  import json
3
+ import asyncio
4
+ import nest_asyncio
5
+ import edge_tts
6
  from dotenv import load_dotenv
7
  from pypdf import PdfReader
8
  import gradio as gr
 
9
  from huggingface_hub import InferenceClient
10
  from spaces import GPU
11
 
12
+ # Allow async loops in Gradio
13
+ nest_asyncio.apply()
14
+
15
  # Load environment keys
16
  load_dotenv()
 
17
  HF_TOKEN = os.getenv("HF_TOKEN") # Automatically set in Spaces
18
 
19
+ # Initialize Client (Qwen 72B)
20
  if HF_TOKEN:
 
21
  hf_client = InferenceClient(model="Qwen/Qwen2.5-72B-Instruct", token=HF_TOKEN)
22
 
 
 
 
 
23
  # =========================
24
  # HELPER FUNCTIONS
25
  # =========================
 
35
  except Exception as e:
36
  return f"Error reading PDF: {e}"
37
 
38
+ async def generate_audio_file(text, voice, output_path):
39
+ """Generates audio using free Edge TTS"""
40
+ communicate = edge_tts.Communicate(text, voice)
41
+ await communicate.save(output_path)
42
+ return output_path
43
+
44
  # =========================
45
  # CORE LOGIC
46
  # =========================
 
57
  if not pdf_file:
58
  return "⚠️ Upload a PDF first.", None, new_state
59
 
60
+ if not HF_TOKEN:
61
+ return "⚠️ Missing HF_TOKEN. This usually works automatically in Spaces.", None, new_state
62
 
63
  pdf_text = extract_text_from_pdf(pdf_file)
64
  new_state["full_text"] = pdf_text
 
68
  "Gossip Columnist": "You are a gossip columnist host. Tone: Dramatic, sensationalist, and excited.",
69
  }
70
 
71
+ # Qwen System Prompt
72
  system_instruction = f"""
73
  {prompts.get(persona_style)}
74
 
 
110
  except Exception as e:
111
  return f"Error with Qwen: {e}", None, new_state
112
 
113
+ # We use async here for Edge TTS
114
+ async def play_next_chunk(state_data):
115
  if not state_data or not state_data.get("script"):
116
  return None, "⚠️ No script generated yet.", state_data
117
 
 
123
 
124
  line = script[idx]
125
 
126
+ # SELECT VOICES (Free Edge TTS)
127
+ # Host A = Male, Host B = Female
128
+ voice_id = "en-US-ChristopherNeural"
129
+
130
+ if line["speaker"] == "Host B":
131
+ voice_id = "en-US-AriaNeural"
132
+
133
+ # Switch voices for Gossip mode
134
+ if state_data["persona"] == "Gossip Columnist":
135
+ voice_id = "en-US-EricNeural" if line["speaker"] == "Host A" else "en-US-AnaNeural"
136
 
137
  try:
 
 
 
 
 
 
138
  save_path = f"temp_{idx}.mp3"
139
+ await generate_audio_file(line["text"], voice_id, save_path)
 
 
140
 
141
  state_data["current_index"] += 1
142
  return save_path, f"{line['speaker']}: {line['text']}", state_data
 
144
  except Exception as e:
145
  return None, f"Audio error: {e}", state_data
146
 
147
+ async def interrupt_and_ask(question, state_data):
 
148
  if not state_data or not state_data.get("full_text"):
149
  return None, "Upload PDF first.", state_data
150
 
 
162
  return None, f"Qwen Error: {e}", state_data
163
 
164
  try:
 
 
 
 
 
 
165
  save_path = "interrupt.mp3"
166
+ await generate_audio_file(answer, "en-US-ChristopherNeural", save_path)
 
 
167
 
168
  return save_path, answer, state_data
169
  except Exception as e: