Spaces:

Rishitha3
/

HyDE

Runtime error

App Files Files Community

update app.py

by TrishaThanmai - opened Jan 21

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+118

-120

Files changed (1) hide show

app.py +118 -120

app.py CHANGED Viewed

@@ -1,89 +1,101 @@
 import os
 import gradio as gr
-import fitz  # PyMuPDF for PDFs
 import docx
 import faiss
 import numpy as np
 import torch
 from sentence_transformers import SentenceTransformer
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from gtts import gTTS
 from huggingface_hub import login
 # =============================
-# 1) Auth & Config
 # =============================
 HF_TOKEN = os.getenv("HF_TOKEN")
-if HF_TOKEN is None:
-    raise ValueError("⚠️ Please set your HF_TOKEN as an environment variable.")
 EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
-LLM_MODEL_ID = "meta-llama/Llama-3.2-3b-instruct"
 ASR_MODEL_ID = "openai/whisper-small"
 # =============================
-# 2) Load Models
 # =============================
 embedding_model = SentenceTransformer(EMBED_MODEL_ID)
-login(HF_TOKEN)
-tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID, token=HF_TOKEN)
-llm = AutoModelForCausalLM.from_pretrained(
-    LLM_MODEL_ID,
-    device_map="auto",
-    torch_dtype=torch.float16,
     token=HF_TOKEN
 )
-# Whisper (speech-to-text)
-stt_model = pipeline("automatic-speech-recognition", model=ASR_MODEL_ID, token=HF_TOKEN)
 # =============================
-# 3) File Text Extraction
 # =============================
 def extract_text(file_path: str) -> str:
     if not file_path:
         return ""
-    _, ext = os.path.splitext(file_path.lower())
     text = ""
-    if ext == ".pdf":
-        doc = fitz.open(file_path)
-        for page in doc:
-            text += page.get_text("text")
-    elif ext == ".docx":
-        doc = docx.Document(file_path)
-        for para in doc.paragraphs:
-            text += para.text + "\n"
-    else:
-        with open(file_path, "rb") as f:
-            text = f.read().decode("utf-8", errors="ignore")
-    return text
 # =============================
 # 4) Build FAISS Index
 # =============================
-def build_faiss(text: str, chunk_size=500, overlap=50):
-    if not text.strip():
         return None, None
     chunks = []
-    step = max(1, chunk_size - overlap)
     for i in range(0, len(text), step):
-        chunk = text[i:i + chunk_size]
-        if chunk.strip():
             chunks.append(chunk)
     if not chunks:
         return None, None
-    embeddings = embedding_model.encode(chunks, convert_to_numpy=True, normalize_embeddings=True)
-    dim = embeddings.shape[1]
-    index = faiss.IndexFlatIP(dim)
-    index.add(embeddings)
     return index, chunks
 # =============================
-# 5) Globals (indexed docs)
 # =============================
 doc_index = None
 doc_chunks = None
@@ -91,98 +103,84 @@ doc_chunks = None
 # =============================
 # 6) Handlers
 # =============================
-def upload_file(file_path: str):
     global doc_index, doc_chunks
-    if not file_path:
-        return "⚠️ Please upload a file first."
     text = extract_text(file_path)
     idx, chunks = build_faiss(text)
     if idx is None:
-        return "⚠️ Could not index: file appears empty."
     doc_index, doc_chunks = idx, chunks
-    return f"✅ Document indexed! {len(chunks)} chunks ready."
-def answer_query(query: str):
-    global doc_index, doc_chunks
-    if not query or not query.strip():
-        return "⚠️ Please enter a question."
-    if doc_index is None or not doc_chunks:
-        return "⚠️ Please upload and index a document first."
-    # ---- Retrieve context ----
-    q_vec = embedding_model.encode([query], convert_to_numpy=True, normalize_embeddings=True)
-    D, I = doc_index.search(q_vec, k=min(5, len(doc_chunks)))
-    retrieved = [doc_chunks[i] for i in I[0] if 0 <= i < len(doc_chunks)]
-    context = "\n".join(retrieved)
-    # ---- Final Answer ----
-    final_prompt = f"""
-    [INST] You are a helpful tutor. Based only on the context below, answer the question.
-    If not in context, say "I could not find this in the text."
-    Context:
-    {context}
-    Question: {query}
-    Answer: [/INST]
-    """
-    inputs = tokenizer(final_prompt, return_tensors="pt", truncation=True).to(llm.device)
-    outputs = llm.generate(**inputs, max_new_tokens=300, temperature=0.7, top_p=0.9, do_sample=True)
-    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    if "Answer:" in answer:
-        answer = answer.split("Answer:")[-1].strip()
-    return answer
-def synthesize_with_gtts(text: str, out_path="out.mp3"):
-    tts = gTTS(text=text, lang="en")
-    tts.save(out_path)
-    return out_path
-def voice_query(audio_path: str):
-    if not audio_path:
-        return "⚠️ Please record your question.", "", None
-    # 1) Speech -> Text
-    asr = stt_model(audio_path)
-    recognized = asr.get("text", "").strip()
-    if not recognized:
-        return "⚠️ Could not transcribe audio.", "", None
-    # 2) Answer Query
-    ans = answer_query(recognized)
-    # 3) Text -> Speech
-    mp3_path = synthesize_with_gtts(ans, "answer.mp3")
-    return recognized, ans, mp3_path
 # =============================
-# 7) Gradio UI
 # =============================
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="cyan")) as demo:
-    gr.Markdown("# 📚 RAG Chatbot + 🎤 Voice (Whisper + gTTS)")
-    gr.Markdown("Upload a PDF/DOCX/TXT and ask by typing **or** speaking.")
-    with gr.Row():
-        with gr.Column(scale=1):
-            file_input = gr.File(label="📂 Upload Document", type="filepath")
-            upload_btn = gr.Button("⚡ Index Document", variant="primary")
-            status = gr.Textbox(label="Status", interactive=False)
-        with gr.Column(scale=2):
-            gr.Markdown("### ✍️ Text Chat")
-            query = gr.Textbox(label="❓ Ask a Question", placeholder="e.g., What are the key points?")
-            ask_btn = gr.Button("🚀 Get Answer", variant="primary")
-            answer = gr.Textbox(label="💡 Answer", lines=8)
-            gr.Markdown("### 🎤 Voice Chat")
-            mic_input = gr.Audio(sources=["microphone"], type="filepath", label="🎙️ Speak your question")
-            rec_text = gr.Textbox(label="📝 Recognized Speech", interactive=False)
-            v_answer = gr.Textbox(label="💡 Answer (voice)", lines=8)
-            v_audio = gr.Audio(label="🔊 Bot Voice Reply")
-    # Bind events
-    upload_btn.click(fn=upload_file, inputs=file_input, outputs=status)
-    ask_btn.click(fn=answer_query, inputs=query, outputs=answer)
-    mic_input.change(fn=voice_query, inputs=mic_input, outputs=[rec_text, v_answer, v_audio])
 demo.launch()

 import os
 import gradio as gr
+import fitz
 import docx
 import faiss
 import numpy as np
 import torch
 from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 from gtts import gTTS
 from huggingface_hub import login
 # =============================
+# 1) Config
 # =============================
 HF_TOKEN = os.getenv("HF_TOKEN")
+if not HF_TOKEN:
+    raise ValueError("Please set HF_TOKEN in Space secrets")
+login(HF_TOKEN)
 EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
+LLM_MODEL_ID = "google/flan-t5-base"
 ASR_MODEL_ID = "openai/whisper-small"
 # =============================
+# 2) Load Models (cached)
 # =============================
 embedding_model = SentenceTransformer(EMBED_MODEL_ID)
+tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID)
+llm = AutoModelForSeq2SeqLM.from_pretrained(LLM_MODEL_ID)
+stt_model = pipeline(
+    "automatic-speech-recognition",
+    model=ASR_MODEL_ID,
     token=HF_TOKEN
 )
 # =============================
+# 3) Text Extraction
 # =============================
 def extract_text(file_path: str) -> str:
     if not file_path:
         return ""
     text = ""
+    ext = os.path.splitext(file_path)[1].lower()
+    try:
+        if ext == ".pdf":
+            doc = fitz.open(file_path)
+            for page in doc:
+                text += page.get_text()
+        elif ext == ".docx":
+            doc = docx.Document(file_path)
+            for p in doc.paragraphs:
+                text += p.text + "\n"
+        else:
+            with open(file_path, "r", errors="ignore") as f:
+                text = f.read()
+    except Exception:
+        return ""
+    return text.strip()
 # =============================
 # 4) Build FAISS Index
 # =============================
+def build_faiss(text, chunk_size=500, overlap=50):
+    if not text:
         return None, None
     chunks = []
+    step = chunk_size - overlap
     for i in range(0, len(text), step):
+        chunk = text[i:i + chunk_size].strip()
+        if chunk:
             chunks.append(chunk)
     if not chunks:
         return None, None
+    embeds = embedding_model.encode(
+        chunks,
+        convert_to_numpy=True,
+        normalize_embeddings=True
+    )
+    index = faiss.IndexFlatIP(embeds.shape[1])
+    index.add(embeds)
     return index, chunks
 # =============================
+# 5) Globals
 # =============================
 doc_index = None
 doc_chunks = None
 # =============================
 # 6) Handlers
 # =============================
+def upload_file(file_path):
     global doc_index, doc_chunks
     text = extract_text(file_path)
+    if not text:
+        return "❌ No readable text found."
     idx, chunks = build_faiss(text)
     if idx is None:
+        return "❌ Indexing failed."
     doc_index, doc_chunks = idx, chunks
+    return f"✅ Indexed {len(chunks)} chunks."
+def answer_query(query):
+    if not query.strip():
+        return "⚠️ Enter a question."
+    if doc_index is None:
+        return "⚠️ Upload a document first."
+    q_vec = embedding_model.encode(
+        [query],
+        convert_to_numpy=True,
+        normalize_embeddings=True
+    )
+    _, I = doc_index.search(q_vec, k=5)
+    context = "\n".join(doc_chunks[i] for i in I[0])
+    prompt = f"""
+Answer using only the context below.
+If not found, say "Not in document".
+Context:
+{context}
+Question:
+{query}
+"""
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
+    outputs = llm.generate(**inputs, max_new_tokens=200)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+def voice_query(audio_path):
+    if not audio_path:
+        return "", "", None
+    speech = stt_model(audio_path)["text"]
+    answer = answer_query(speech)
+    tts = gTTS(answer)
+    tts.save("reply.mp3")
+    return speech, answer, "reply.mp3"
 # =============================
+# 7) UI
 # =============================
+with gr.Blocks() as demo:
+    gr.Markdown("# 📚 RAG Chatbot with Voice")
+    file = gr.File(type="filepath")
+    status = gr.Textbox()
+    gr.Button("Index").click(upload_file, file, status)
+    query = gr.Textbox(label="Question")
+    answer = gr.Textbox()
+    gr.Button("Ask").click(answer_query, query, answer)
+    audio = gr.Audio(type="filepath")
+    rec = gr.Textbox()
+    v_ans = gr.Textbox()
+    v_audio = gr.Audio()
+    audio.change(voice_query, audio, [rec, v_ans, v_audio])
 demo.launch()