core-OCR

Paused

App Files Files Community

prithivMLmods commited on Feb 8

Commit

32d8e74

verified ·

1 Parent(s): 37aaee6

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -55

app.py CHANGED Viewed

@@ -1,31 +1,17 @@
 import os
-from collections.abc import Iterator
-from threading import Thread
 import gradio as gr
-import spaces
 import torch
-import edge_tts
 import asyncio
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """
-# QwQ Tiny
 """
-css ='''
-h1 {
-  text-align: center;
-  display: block;
-}
-#duplicate-button {
-  margin: auto;
-  color: #fff;
-  background: #1565c0;
-  border-radius: 100vh;
-}
-'''
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -41,16 +27,14 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 model.eval()
-async def text_to_speech(text: str, output_file="output.mp3"):
-    """Convert text to speech using Edge TTS and save as MP3"""
-    voice = "en-US-JennyNeural"  # Change this to your preferred voice
-    communicate = edge_tts.Communicate(text, voice)
-    await communicate.save(output_file)
-    return output_file
-@spaces.GPU
 def generate(
     message: str,
     chat_history: list[dict],
@@ -59,47 +43,55 @@ def generate(
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
-):
-    """Generates chatbot response and handles TTS requests"""
-    is_tts = message.strip().lower().startswith("@tts")
-    message = message.replace("@tts", "").strip()
-    conversation = [*chat_history, {"role": "user", "content": message}]
     input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = dict(
-        {"input_ids": input_ids},
-        streamer=streamer,
-        max_new_tokens=max_new_tokens,
-        do_sample=True,
-        top_p=top_p,
-        top_k=top_k,
-        temperature=temperature,
-        num_beams=1,
-        repetition_penalty=repetition_penalty,
-    )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
     for text in streamer:
         outputs.append(text)
-        yield "".join(outputs)
-    final_response = "".join(outputs)
     if is_tts:
-        output_file = asyncio.run(text_to_speech(final_response))
-        return output_file  # Return MP3 file
-    return final_response  # Return text response
 demo = gr.ChatInterface(
     fn=generate,
@@ -113,15 +105,13 @@ demo = gr.ChatInterface(
     stop_btn=None,
     examples=[
         ["A train travels 60 kilometers per hour. If it travels for 5 hours, how far will it travel in total?"],
-        ["Write a Python function to check if a number is prime."],
-        ["What causes rainbows to form?"],
-        ["Rewrite the following sentence in passive voice: 'The dog chased the cat.'"],
-        ["@tts What is the capital of France?"],
     ],
     cache_examples=False,
     type="messages",
     description=DESCRIPTION,
-    css=css,
     fill_height=True,
 )

 import os
 import gradio as gr
 import torch
+import tempfile
 import asyncio
+import edge_tts
+from threading import Thread
+from collections.abc import Iterator
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """
+# QwQ Tiny with Edge TTS
 """
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 )
 model.eval()
+async def text_to_speech(text: str) -> str:
+    """Converts text to speech using Edge TTS and returns the generated audio file path."""
+    communicate = edge_tts.Communicate(text)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+        tmp_path = tmp_file.name
+        await communicate.save(tmp_path)
+    return tmp_path
 def generate(
     message: str,
     chat_history: list[dict],
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
+) -> Iterator[str] | str:
+    is_tts = message.strip().startswith("@tts")
+    is_text_only = message.strip().startswith("@text")
+    # Remove special tags
+    if is_tts:
+        message = message.replace("@tts", "").strip()
+    elif is_text_only:
+        message = message.replace("@text", "").strip()
+    conversation = [*chat_history, {"role": "user", "content": message}]
     input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = {
+        "input_ids": input_ids,
+        "streamer": streamer,
+        "max_new_tokens": max_new_tokens,
+        "do_sample": True,
+        "top_p": top_p,
+        "top_k": top_k,
+        "temperature": temperature,
+        "num_beams": 1,
+        "repetition_penalty": repetition_penalty,
+    }
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
     for text in streamer:
         outputs.append(text)
+    final_output = "".join(outputs)
+    # If TTS requested, generate speech and return audio file
     if is_tts:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        audio_path = loop.run_until_complete(text_to_speech(final_output))
+        return audio_path  # Returning audio file path
+    return final_output  # Returning text output
 demo = gr.ChatInterface(
     fn=generate,
     stop_btn=None,
     examples=[
         ["A train travels 60 kilometers per hour. If it travels for 5 hours, how far will it travel in total?"],
+        ["@text What causes rainbows to form?"],
+        ["edgetts@tts Explain Newton's third law of motion."],
+        ["@text Rewrite the following sentence in passive voice: 'The dog chased the cat.'"],
     ],
     cache_examples=False,
     type="messages",
     description=DESCRIPTION,
     fill_height=True,
 )