core-OCR

Paused

App Files Files Community

prithivMLmods commited on Feb 8

Commit

ef0f895

verified ·

1 Parent(s): 90c972a

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -13

app.py CHANGED Viewed

@@ -4,13 +4,13 @@ import torch
 import tempfile
 import asyncio
 import edge_tts
-import spaces
 from threading import Thread
 from collections.abc import Iterator
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """
-# QwQ Tiny with Edge TTS
 """
 MAX_MAX_NEW_TOKENS = 2048
@@ -29,14 +29,23 @@ model = AutoModelForCausalLM.from_pretrained(
 model.eval()
 async def text_to_speech(text: str) -> str:
-    """Converts text to speech using Edge TTS and returns the generated audio file path."""
     communicate = edge_tts.Communicate(text)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
-        tmp_path = tmp_file.name
-        await communicate.save(tmp_path)
-    return tmp_path
-@spaces.GPU
 def generate(
     message: str,
     chat_history: list[dict],
@@ -47,12 +56,12 @@ def generate(
     repetition_penalty: float = 1.2,
 ) -> Iterator[str] | str:
-    is_tts = message.strip().startswith("@tts")
     is_text_only = message.strip().startswith("@text")
     # Remove special tags
     if is_tts:
-        message = message.replace("@tts", "").strip()
     elif is_text_only:
         message = message.replace("@text", "").strip()
@@ -91,7 +100,7 @@ def generate(
         loop = asyncio.new_event_loop()
         asyncio.set_event_loop(loop)
         audio_path = loop.run_until_complete(text_to_speech(final_output))
-        return audio_path  # Returning audio file path
     return final_output  # Returning text output
@@ -107,8 +116,8 @@ demo = gr.ChatInterface(
     stop_btn=None,
     examples=[
         ["A train travels 60 kilometers per hour. If it travels for 5 hours, how far will it travel in total?"],
-        ["@text What causes rainbows to form?"],
-        ["@tts Explain Newton's third law of motion."],
         ["@text Rewrite the following sentence in passive voice: 'The dog chased the cat.'"],
     ],
     cache_examples=False,

 import tempfile
 import asyncio
 import edge_tts
+from pydub import AudioSegment
 from threading import Thread
 from collections.abc import Iterator
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """
+# QwQ Tiny with Edge TTS (MP3 Output)
 """
 MAX_MAX_NEW_TOKENS = 2048
 model.eval()
 async def text_to_speech(text: str) -> str:
+    """Converts text to speech using Edge TTS, converts WAV to MP3, and returns the MP3 file path."""
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
+        wav_path = tmp_wav.name
     communicate = edge_tts.Communicate(text)
+    await communicate.save(wav_path)
+    # Convert WAV to MP3
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_mp3:
+        mp3_path = tmp_mp3.name
+    audio = AudioSegment.from_wav(wav_path)
+    audio.export(mp3_path, format="mp3")
+    os.remove(wav_path)  # Delete the original WAV file
+    return mp3_path  # Return the MP3 file path
 def generate(
     message: str,
     chat_history: list[dict],
     repetition_penalty: float = 1.2,
 ) -> Iterator[str] | str:
+    is_tts = message.strip().startswith("edgetts@tts")
     is_text_only = message.strip().startswith("@text")
     # Remove special tags
     if is_tts:
+        message = message.replace("edgetts@tts", "").strip()
     elif is_text_only:
         message = message.replace("@text", "").strip()
         loop = asyncio.new_event_loop()
         asyncio.set_event_loop(loop)
         audio_path = loop.run_until_complete(text_to_speech(final_output))
+        return audio_path  # Returning MP3 file path
     return final_output  # Returning text output
     stop_btn=None,
     examples=[
         ["A train travels 60 kilometers per hour. If it travels for 5 hours, how far will it travel in total?"],
+        ["@text What is AI?"],
+        ["edgetts@tts Explain Newton's third law of motion."],
         ["@text Rewrite the following sentence in passive voice: 'The dog chased the cat.'"],
     ],
     cache_examples=False,