Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,13 +3,15 @@ import fitz # PyMuPDF for PDFs
|
|
| 3 |
import docx
|
| 4 |
import faiss
|
| 5 |
import os
|
|
|
|
| 6 |
from sentence_transformers import SentenceTransformer
|
| 7 |
-
from transformers import pipeline
|
|
|
|
| 8 |
|
| 9 |
# =============================
|
| 10 |
# 1. Hugging Face Authentication
|
| 11 |
# =============================
|
| 12 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 13 |
if HF_TOKEN is None:
|
| 14 |
raise ValueError("⚠️ Please set your HF_TOKEN as an environment variable.")
|
| 15 |
|
|
@@ -28,19 +30,21 @@ qa_model = pipeline(
|
|
| 28 |
device_map="auto"
|
| 29 |
)
|
| 30 |
|
| 31 |
-
# Speech-to-Text (Whisper
|
| 32 |
stt_model = pipeline(
|
| 33 |
"automatic-speech-recognition",
|
| 34 |
model="openai/whisper-small",
|
| 35 |
token=HF_TOKEN
|
| 36 |
)
|
| 37 |
|
| 38 |
-
# Text-to-Speech (
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# =============================
|
| 46 |
# 3. Helper: extract text from files
|
|
@@ -55,7 +59,7 @@ def extract_text(file):
|
|
| 55 |
doc = docx.Document(file.name)
|
| 56 |
for para in doc.paragraphs:
|
| 57 |
text += para.text + "\n"
|
| 58 |
-
else:
|
| 59 |
text = file.read().decode("utf-8", errors="ignore")
|
| 60 |
return text
|
| 61 |
|
|
@@ -124,17 +128,18 @@ def voice_query(audio):
|
|
| 124 |
answer = answer_query(text_query)
|
| 125 |
|
| 126 |
# Step 3: Text-to-Speech
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
| 129 |
|
| 130 |
# =============================
|
| 131 |
-
# 9. Gradio UI
|
| 132 |
# =============================
|
| 133 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="cyan")) as demo:
|
| 134 |
gr.Markdown("""
|
| 135 |
# 📚 HyDE RAG Chatbot + 🎤 Voice Assistant
|
| 136 |
-
|
| 137 |
-
Upload a PDF/DOCX/TXT and start asking questions by **typing or speaking**!
|
| 138 |
""")
|
| 139 |
|
| 140 |
with gr.Row():
|
|
|
|
| 3 |
import docx
|
| 4 |
import faiss
|
| 5 |
import os
|
| 6 |
+
import torch
|
| 7 |
from sentence_transformers import SentenceTransformer
|
| 8 |
+
from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech
|
| 9 |
+
from datasets import load_dataset
|
| 10 |
|
| 11 |
# =============================
|
| 12 |
# 1. Hugging Face Authentication
|
| 13 |
# =============================
|
| 14 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 15 |
if HF_TOKEN is None:
|
| 16 |
raise ValueError("⚠️ Please set your HF_TOKEN as an environment variable.")
|
| 17 |
|
|
|
|
| 30 |
device_map="auto"
|
| 31 |
)
|
| 32 |
|
| 33 |
+
# Speech-to-Text (Whisper)
|
| 34 |
stt_model = pipeline(
|
| 35 |
"automatic-speech-recognition",
|
| 36 |
model="openai/whisper-small",
|
| 37 |
token=HF_TOKEN
|
| 38 |
)
|
| 39 |
|
| 40 |
+
# Text-to-Speech (SpeechT5)
|
| 41 |
+
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts", use_auth_token=HF_TOKEN)
|
| 42 |
+
tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts", use_auth_token=HF_TOKEN)
|
| 43 |
+
vocoder = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speech_t5_hifigan", use_auth_token=HF_TOKEN)
|
| 44 |
+
|
| 45 |
+
# Load a random speaker embedding
|
| 46 |
+
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
| 47 |
+
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
| 48 |
|
| 49 |
# =============================
|
| 50 |
# 3. Helper: extract text from files
|
|
|
|
| 59 |
doc = docx.Document(file.name)
|
| 60 |
for para in doc.paragraphs:
|
| 61 |
text += para.text + "\n"
|
| 62 |
+
else:
|
| 63 |
text = file.read().decode("utf-8", errors="ignore")
|
| 64 |
return text
|
| 65 |
|
|
|
|
| 128 |
answer = answer_query(text_query)
|
| 129 |
|
| 130 |
# Step 3: Text-to-Speech
|
| 131 |
+
inputs = processor(text=answer, return_tensors="pt")
|
| 132 |
+
with torch.no_grad():
|
| 133 |
+
speech = tts_model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
|
| 134 |
+
return answer, (speech.numpy(), 16000)
|
| 135 |
|
| 136 |
# =============================
|
| 137 |
+
# 9. Gradio UI
|
| 138 |
# =============================
|
| 139 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="cyan")) as demo:
|
| 140 |
gr.Markdown("""
|
| 141 |
# 📚 HyDE RAG Chatbot + 🎤 Voice Assistant
|
| 142 |
+
Upload a PDF/DOCX/TXT and ask questions by typing or speaking!
|
|
|
|
| 143 |
""")
|
| 144 |
|
| 145 |
with gr.Row():
|