Spaces:

ai-tomoni
/

project-green

Runtime error

App Files Files

ai-tomoni commited on Jun 5

Commit

5f63e76

verified ·

1 Parent(s): a977820

Create app_with_audio.py

Browse files

Files changed (1) hide show

app_with_audio.py +84 -0

app_with_audio.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import gradio as gr
+from huggingface_hub import InferenceClient
+import os
+import whisper
+from gtts import gTTS
+import time
+# Load token and model
+HF_TOKEN = os.getenv("tomoniaccess")
+client = InferenceClient(
+    model="mistralai/Mistral-7B-Instruct-v0.3",
+    token=HF_TOKEN
+)
+# Load Whisper base model
+whisper_model = whisper.load_model("base")
+SYSTEM_MESSAGE = (
+    "Du bist ein einfühlsamer Unterstützer für Menschen mit Depressionen. "
+    "Sprich sanft, validiere ihre Gefühle und biete kleine, konkrete Hilfestellungen an. "
+    "Mach keine Diagnosen und verweise bei Bedarf freundlich auf professionelle Hilfe."
+)
+def full_pipeline(audio_path, max_tokens, temperature, top_p):
+    t0 = time.time()
+    # 1. Transcription
+    t1 = time.time()
+    result = whisper_model.transcribe(audio_path, language="de")
+    user_input = result["text"]
+    t2 = time.time()
+    print(f"⏱️ Transcription took {t2 - t1:.2f} sec")
+    # 2. Chat completion
+    messages = [
+        {"role": "system", "content": SYSTEM_MESSAGE},
+        {"role": "user", "content": user_input}
+    ]
+    response_text = ""
+    t3 = time.time()
+    for message in client.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        token = message.choices[0].delta.content
+        if token:
+            response_text += token
+    t4 = time.time()
+    print(f"🤖 Mistral response took {t4 - t3:.2f} sec")
+    # 3. Text to Speech
+    tts = gTTS(response_text, lang="de")
+    audio_output_path = "response.mp3"
+    tts.save(audio_output_path)
+    t5 = time.time()
+    print(f"🔊 TTS took {t5 - t4:.2f} sec")
+    print(f"✅ Total processing time: {t5 - t0:.2f} sec")
+    return user_input, response_text, audio_output_path
+# Gradio UI
+demo = gr.Interface(
+    fn=full_pipeline,
+    inputs=[
+        gr.Audio(source="microphone", type="filepath", label="Sprich hier"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max neue Tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperatur"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
+    ],
+    outputs=[
+        gr.Textbox(label="Dein gesprochener Input"),
+        gr.Textbox(label="Antwort des Bots"),
+        gr.Audio(type="filepath", label="Antwort als Audio"),
+    ],
+    title="Einfühlsamer Chatbot für emotionale Unterstützung",
+    description="Sprich ins Mikrofon. Der Bot antwortet auf Deutsch, einfühlsam und unterstützend."
+)
+if __name__ == "__main__":
+    demo.launch()