Spaces:
Runtime error
Runtime error
This works well
Browse files
app.py
CHANGED
|
@@ -25,8 +25,26 @@ client = InferenceClient(
|
|
| 25 |
token=os.getenv("HF_TOKEN"),
|
| 26 |
)
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# -----------------------------------------------------------------------------
|
| 29 |
-
# Kokoro TTS
|
| 30 |
# -----------------------------------------------------------------------------
|
| 31 |
CUDA_AVAILABLE = torch.cuda.is_available()
|
| 32 |
|
|
@@ -40,29 +58,13 @@ FEMALE_VOICE = "af_heart" # [S2]
|
|
| 40 |
for v in (MALE_VOICE, FEMALE_VOICE):
|
| 41 |
kpipeline.load_voice(v)
|
| 42 |
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
audio_queue: queue.Queue[tuple[int, np.ndarray] | None] = queue.Queue()
|
| 45 |
stop_signal = threading.Event()
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
def generate_podcast_text(subject: str) -> str:
|
| 50 |
-
"""Ask the LLM for a ~5‑minute two‑host script."""
|
| 51 |
-
prompt = f"""Generate a podcast told by 2 hosts about {subject}.
|
| 52 |
-
The podcast should be an insightful discussion, with some amount of playful banter.
|
| 53 |
-
Separate dialog as follows using [S1] for the male host and [S2] for the female host, for instance:
|
| 54 |
-
[S1] Hello, how are you?
|
| 55 |
-
[S2] I'm good, thank you. How are you?
|
| 56 |
-
[S1] I'm good, thank you. (laughs)
|
| 57 |
-
[S2] Great.
|
| 58 |
-
Now go on, make 5 minutes of podcast.
|
| 59 |
-
"""
|
| 60 |
-
response = client.chat_completion(
|
| 61 |
-
[{"role": "user", "content": prompt}],
|
| 62 |
-
max_tokens=1000,
|
| 63 |
-
)
|
| 64 |
-
return response.choices[0].message.content
|
| 65 |
-
|
| 66 |
@spaces.GPU
|
| 67 |
def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
|
| 68 |
"""Read each line, pick voice via tag, send chunks to the queue."""
|
|
@@ -96,7 +98,6 @@ def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
|
|
| 96 |
audio = kmodel(ps, ref_s, speed)
|
| 97 |
audio_queue.put((24000, audio.numpy()))
|
| 98 |
audio_numpy = audio.numpy()
|
| 99 |
-
print("GENERATED AUDIO", audio_numpy[-100:], audio_numpy.max())
|
| 100 |
if first:
|
| 101 |
first = False
|
| 102 |
audio_queue.put((24000, torch.zeros(1).numpy()))
|
|
@@ -111,14 +112,12 @@ def stream_audio_generator(podcast_text: str):
|
|
| 111 |
chunk = audio_queue.get()
|
| 112 |
if chunk is None:
|
| 113 |
break
|
| 114 |
-
print("CHUNK", chunk, type(chunk))
|
| 115 |
sr, data = chunk
|
| 116 |
|
| 117 |
buf = io.BytesIO()
|
| 118 |
sf.write(buf, data, sr, format="wav")
|
| 119 |
buf.seek(0)
|
| 120 |
-
yield buf.getvalue()
|
| 121 |
-
|
| 122 |
|
| 123 |
|
| 124 |
def stop_generation():
|
|
@@ -155,7 +154,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 155 |
|
| 156 |
generate_btn.click(fn=generate_podcast, outputs=podcast_output)
|
| 157 |
|
| 158 |
-
start_audio_btn.click(fn=stream_audio_generator, inputs=podcast_output, outputs=audio_output)
|
| 159 |
stop_btn.click(fn=stop_generation, outputs=status_text)
|
| 160 |
|
| 161 |
if __name__ == "__main__":
|
|
|
|
| 25 |
token=os.getenv("HF_TOKEN"),
|
| 26 |
)
|
| 27 |
|
| 28 |
+
|
| 29 |
+
def generate_podcast_text(subject: str) -> str:
|
| 30 |
+
"""Ask the LLM for a script of a podcast given by two hosts."""
|
| 31 |
+
prompt = f"""Generate the script of "Open Paper review", a podcast told by 2 hosts about {subject}.
|
| 32 |
+
The podcast should be an insightful discussion, with some amount of playful banter.
|
| 33 |
+
Separate dialog as follows, using [S1] for the male host and [S2] for the female host, for instance:
|
| 34 |
+
[S1] Hello, how are you?
|
| 35 |
+
[S2] I'm good, thank you. How are you?
|
| 36 |
+
[S1] I'm good, thank you.
|
| 37 |
+
[S2] Great.
|
| 38 |
+
Now go on, make 5 minutes of podcast.
|
| 39 |
+
"""
|
| 40 |
+
response = client.chat_completion(
|
| 41 |
+
[{"role": "user", "content": prompt}],
|
| 42 |
+
max_tokens=8156,
|
| 43 |
+
)
|
| 44 |
+
return response.choices[0].message.content
|
| 45 |
+
|
| 46 |
# -----------------------------------------------------------------------------
|
| 47 |
+
# Kokoro TTS
|
| 48 |
# -----------------------------------------------------------------------------
|
| 49 |
CUDA_AVAILABLE = torch.cuda.is_available()
|
| 50 |
|
|
|
|
| 58 |
for v in (MALE_VOICE, FEMALE_VOICE):
|
| 59 |
kpipeline.load_voice(v)
|
| 60 |
|
| 61 |
+
# -----------------------------------------------------------------------------
|
| 62 |
+
# Audio generation system with queue
|
| 63 |
+
# -----------------------------------------------------------------------------
|
| 64 |
|
| 65 |
audio_queue: queue.Queue[tuple[int, np.ndarray] | None] = queue.Queue()
|
| 66 |
stop_signal = threading.Event()
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
@spaces.GPU
|
| 69 |
def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
|
| 70 |
"""Read each line, pick voice via tag, send chunks to the queue."""
|
|
|
|
| 98 |
audio = kmodel(ps, ref_s, speed)
|
| 99 |
audio_queue.put((24000, audio.numpy()))
|
| 100 |
audio_numpy = audio.numpy()
|
|
|
|
| 101 |
if first:
|
| 102 |
first = False
|
| 103 |
audio_queue.put((24000, torch.zeros(1).numpy()))
|
|
|
|
| 112 |
chunk = audio_queue.get()
|
| 113 |
if chunk is None:
|
| 114 |
break
|
|
|
|
| 115 |
sr, data = chunk
|
| 116 |
|
| 117 |
buf = io.BytesIO()
|
| 118 |
sf.write(buf, data, sr, format="wav")
|
| 119 |
buf.seek(0)
|
| 120 |
+
yield buf.getvalue(), "Generating podcast..."
|
|
|
|
| 121 |
|
| 122 |
|
| 123 |
def stop_generation():
|
|
|
|
| 154 |
|
| 155 |
generate_btn.click(fn=generate_podcast, outputs=podcast_output)
|
| 156 |
|
| 157 |
+
start_audio_btn.click(fn=stream_audio_generator, inputs=podcast_output, outputs=[audio_output, status_text])
|
| 158 |
stop_btn.click(fn=stop_generation, outputs=status_text)
|
| 159 |
|
| 160 |
if __name__ == "__main__":
|