Spaces:

Rishitha3
/

HyDE

Runtime error

App Files Files Community

Rishitha3 commited on Aug 30, 2025

Commit

dbd85ec

verified ·

1 Parent(s): 0bc80fc

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -15

app.py CHANGED Viewed

@@ -3,13 +3,15 @@ import fitz  # PyMuPDF for PDFs
 import docx
 import faiss
 import os
 from sentence_transformers import SentenceTransformer
-from transformers import pipeline
 # =============================
 # 1. Hugging Face Authentication
 # =============================
-HF_TOKEN = os.getenv("HF_TOKEN")  # export HF_TOKEN="your_token_here"
 if HF_TOKEN is None:
     raise ValueError("⚠️ Please set your HF_TOKEN as an environment variable.")
@@ -28,19 +30,21 @@ qa_model = pipeline(
     device_map="auto"
 )
-# Speech-to-Text (Whisper small, lightweight)
 stt_model = pipeline(
     "automatic-speech-recognition",
     model="openai/whisper-small",
     token=HF_TOKEN
 )
-# Text-to-Speech (VITS)
-tts_model = pipeline(
-    "text-to-speech",
-    model="espnet/kan-bayashi_ljspeech_vits",
-    token=HF_TOKEN
-)
 # =============================
 # 3. Helper: extract text from files
@@ -55,7 +59,7 @@ def extract_text(file):
         doc = docx.Document(file.name)
         for para in doc.paragraphs:
             text += para.text + "\n"
-    else:  # fallback: txt
         text = file.read().decode("utf-8", errors="ignore")
     return text
@@ -124,17 +128,18 @@ def voice_query(audio):
     answer = answer_query(text_query)
     # Step 3: Text-to-Speech
-    tts_result = tts_model(answer)
-    return answer, (tts_result["audio"], tts_result["sampling_rate"])
 # =============================
-# 9. Gradio UI (Visually Appealing)
 # =============================
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="cyan")) as demo:
     gr.Markdown("""
     # 📚 HyDE RAG Chatbot + 🎤 Voice Assistant
-    Talk with your documents using **Hypothetical Document Embeddings (HyDE)**.
-    Upload a PDF/DOCX/TXT and start asking questions by **typing or speaking**!
     """)
     with gr.Row():

 import docx
 import faiss
 import os
+import torch
 from sentence_transformers import SentenceTransformer
+from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech
+from datasets import load_dataset
 # =============================
 # 1. Hugging Face Authentication
 # =============================
+HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN is None:
     raise ValueError("⚠️ Please set your HF_TOKEN as an environment variable.")
     device_map="auto"
 )
+# Speech-to-Text (Whisper)
 stt_model = pipeline(
     "automatic-speech-recognition",
     model="openai/whisper-small",
     token=HF_TOKEN
 )
+# Text-to-Speech (SpeechT5)
+processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts", use_auth_token=HF_TOKEN)
+tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts", use_auth_token=HF_TOKEN)
+vocoder = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speech_t5_hifigan", use_auth_token=HF_TOKEN)
+# Load a random speaker embedding
+embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 # =============================
 # 3. Helper: extract text from files
         doc = docx.Document(file.name)
         for para in doc.paragraphs:
             text += para.text + "\n"
+    else:
         text = file.read().decode("utf-8", errors="ignore")
     return text
     answer = answer_query(text_query)
     # Step 3: Text-to-Speech
+    inputs = processor(text=answer, return_tensors="pt")
+    with torch.no_grad():
+        speech = tts_model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
+    return answer, (speech.numpy(), 16000)
 # =============================
+# 9. Gradio UI
 # =============================
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="cyan")) as demo:
     gr.Markdown("""
     # 📚 HyDE RAG Chatbot + 🎤 Voice Assistant
+    Upload a PDF/DOCX/TXT and ask questions by typing or speaking!
     """)
     with gr.Row():