Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from rag import process_pdfs, retrieve_context | |
| from emotion import get_emotion_and_tone | |
| from llm import get_llm_response | |
| from tts_gemini import tts_gemini | |
| import whisper | |
| import time | |
| import csv | |
| import os | |
| # Load Whisper model once | |
| whisper_model = whisper.load_model("base") | |
| LOG_DIR = "logs" | |
| LOG_FILE = os.path.join(LOG_DIR, "latency_log.csv") | |
| # Ensure log directory and CSV header | |
| os.makedirs(LOG_DIR, exist_ok=True) | |
| if not os.path.exists(LOG_FILE): | |
| with open(LOG_FILE, mode="w", newline="") as f: | |
| writer = csv.writer(f) | |
| writer.writerow([ | |
| "Whisper STT (s)", "Document Retrieval (s)", "Sentiment Analysis (s)", "Response Gen (LLM) (s)", "TTS Synthesis (s)", "Total (s)" | |
| ]) | |
| def process_audio_with_rag(audio): | |
| t0 = time.time() | |
| stt_start = time.time() | |
| result = whisper_model.transcribe(audio) | |
| text = result["text"] | |
| stt_end = time.time() | |
| retrieval_start = time.time() | |
| context = retrieve_context(text) | |
| retrieval_end = time.time() | |
| sentiment_start = time.time() | |
| emotion, tone_instruction = get_emotion_and_tone(text) | |
| sentiment_end = time.time() | |
| llm_start = time.time() | |
| llm_output = get_llm_response(text, context, emotion, tone_instruction) | |
| llm_end = time.time() | |
| tts_start = time.time() | |
| tts_path = tts_gemini(llm_output) | |
| tts_end = time.time() | |
| t1 = time.time() | |
| stt_latency = stt_end - stt_start | |
| retrieval_latency = retrieval_end - retrieval_start | |
| sentiment_latency = sentiment_end - sentiment_start | |
| llm_latency = llm_end - llm_start | |
| tts_latency = tts_end - tts_start | |
| total_latency = t1 - t0 | |
| # Log to CSV (latency only) | |
| with open(LOG_FILE, mode="a", newline="") as f: | |
| writer = csv.writer(f) | |
| writer.writerow([ | |
| f"{stt_latency:.3f}", | |
| f"{retrieval_latency:.3f}", | |
| f"{sentiment_latency:.3f}", | |
| f"{llm_latency:.3f}", | |
| f"{tts_latency:.3f}", | |
| f"{total_latency:.3f}" | |
| ]) | |
| return llm_output, emotion, text, context, tts_path | |
| demo = gr.Blocks() | |
| with demo: | |
| gr.Markdown(""" | |
| # Voice to LLM & Sentiment Analyzer with RAG (PDF Upload & Gemini TTS) | |
| """) | |
| with gr.Row(): | |
| pdf_input = gr.Files(label="Upload PDF(s)", type="filepath") | |
| pdf_status = gr.Textbox(label="PDF Processing Status") | |
| pdf_input.upload(process_pdfs, inputs=pdf_input, outputs=pdf_status) | |
| audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak your query") | |
| llm_output = gr.Textbox(label="LLM Output") | |
| sentiment_output = gr.Textbox(label="Sentiment") | |
| transcript_output = gr.Textbox(label="Transcribed Text") | |
| context_output = gr.Textbox(label="Retrieved Context from PDFs") | |
| tts_output = gr.Audio(label="LLM Output (Gemini TTS)") | |
| audio_input.change( | |
| process_audio_with_rag, | |
| inputs=audio_input, | |
| outputs=[llm_output, sentiment_output, transcript_output, context_output, tts_output] | |
| ) |