Rishitha3 commited on
Commit
dbd85ec
·
verified ·
1 Parent(s): 0bc80fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -15
app.py CHANGED
@@ -3,13 +3,15 @@ import fitz # PyMuPDF for PDFs
3
  import docx
4
  import faiss
5
  import os
 
6
  from sentence_transformers import SentenceTransformer
7
- from transformers import pipeline
 
8
 
9
  # =============================
10
  # 1. Hugging Face Authentication
11
  # =============================
12
- HF_TOKEN = os.getenv("HF_TOKEN") # export HF_TOKEN="your_token_here"
13
  if HF_TOKEN is None:
14
  raise ValueError("⚠️ Please set your HF_TOKEN as an environment variable.")
15
 
@@ -28,19 +30,21 @@ qa_model = pipeline(
28
  device_map="auto"
29
  )
30
 
31
- # Speech-to-Text (Whisper small, lightweight)
32
  stt_model = pipeline(
33
  "automatic-speech-recognition",
34
  model="openai/whisper-small",
35
  token=HF_TOKEN
36
  )
37
 
38
- # Text-to-Speech (VITS)
39
- tts_model = pipeline(
40
- "text-to-speech",
41
- model="espnet/kan-bayashi_ljspeech_vits",
42
- token=HF_TOKEN
43
- )
 
 
44
 
45
  # =============================
46
  # 3. Helper: extract text from files
@@ -55,7 +59,7 @@ def extract_text(file):
55
  doc = docx.Document(file.name)
56
  for para in doc.paragraphs:
57
  text += para.text + "\n"
58
- else: # fallback: txt
59
  text = file.read().decode("utf-8", errors="ignore")
60
  return text
61
 
@@ -124,17 +128,18 @@ def voice_query(audio):
124
  answer = answer_query(text_query)
125
 
126
  # Step 3: Text-to-Speech
127
- tts_result = tts_model(answer)
128
- return answer, (tts_result["audio"], tts_result["sampling_rate"])
 
 
129
 
130
  # =============================
131
- # 9. Gradio UI (Visually Appealing)
132
  # =============================
133
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="cyan")) as demo:
134
  gr.Markdown("""
135
  # 📚 HyDE RAG Chatbot + 🎤 Voice Assistant
136
- Talk with your documents using **Hypothetical Document Embeddings (HyDE)**.
137
- Upload a PDF/DOCX/TXT and start asking questions by **typing or speaking**!
138
  """)
139
 
140
  with gr.Row():
 
3
  import docx
4
  import faiss
5
  import os
6
+ import torch
7
  from sentence_transformers import SentenceTransformer
8
+ from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech
9
+ from datasets import load_dataset
10
 
11
  # =============================
12
  # 1. Hugging Face Authentication
13
  # =============================
14
+ HF_TOKEN = os.getenv("HF_TOKEN")
15
  if HF_TOKEN is None:
16
  raise ValueError("⚠️ Please set your HF_TOKEN as an environment variable.")
17
 
 
30
  device_map="auto"
31
  )
32
 
33
+ # Speech-to-Text (Whisper)
34
  stt_model = pipeline(
35
  "automatic-speech-recognition",
36
  model="openai/whisper-small",
37
  token=HF_TOKEN
38
  )
39
 
40
+ # Text-to-Speech (SpeechT5)
41
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts", use_auth_token=HF_TOKEN)
42
+ tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts", use_auth_token=HF_TOKEN)
43
+ vocoder = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speech_t5_hifigan", use_auth_token=HF_TOKEN)
44
+
45
+ # Load a random speaker embedding
46
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
47
+ speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
48
 
49
  # =============================
50
  # 3. Helper: extract text from files
 
59
  doc = docx.Document(file.name)
60
  for para in doc.paragraphs:
61
  text += para.text + "\n"
62
+ else:
63
  text = file.read().decode("utf-8", errors="ignore")
64
  return text
65
 
 
128
  answer = answer_query(text_query)
129
 
130
  # Step 3: Text-to-Speech
131
+ inputs = processor(text=answer, return_tensors="pt")
132
+ with torch.no_grad():
133
+ speech = tts_model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
134
+ return answer, (speech.numpy(), 16000)
135
 
136
  # =============================
137
+ # 9. Gradio UI
138
  # =============================
139
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="cyan")) as demo:
140
  gr.Markdown("""
141
  # 📚 HyDE RAG Chatbot + 🎤 Voice Assistant
142
+ Upload a PDF/DOCX/TXT and ask questions by typing or speaking!
 
143
  """)
144
 
145
  with gr.Row():