Spaces:

Rishitha3
/

HyDE

Runtime error

App Files Files Community

HyDE / app.py

Rishitha3

Update app.py

1c493e6 verified 5 months ago

raw

history blame contribute delete

6.4 kB

	import os
	import gradio as gr
	import fitz # PyMuPDF for PDFs
	import docx
	import faiss
	import numpy as np
	import torch
	from sentence_transformers import SentenceTransformer
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	from gtts import gTTS
	from huggingface_hub import login

	# =============================
	# 1) Auth & Config
	# =============================
	HF_TOKEN = os.getenv("HF_TOKEN")
	if HF_TOKEN is None:
	raise ValueError("⚠️ Please set your HF_TOKEN as an environment variable.")

	EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
	LLM_MODEL_ID = "meta-llama/Llama-3.2-3b-instruct"
	ASR_MODEL_ID = "openai/whisper-small"

	# =============================
	# 2) Load Models
	# =============================
	embedding_model = SentenceTransformer(EMBED_MODEL_ID)

	login(HF_TOKEN)
	tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID, token=HF_TOKEN)
	llm = AutoModelForCausalLM.from_pretrained(
	LLM_MODEL_ID,
	device_map="auto",
	torch_dtype=torch.float16,
	token=HF_TOKEN
	)

	# Whisper (speech-to-text)
	stt_model = pipeline("automatic-speech-recognition", model=ASR_MODEL_ID, token=HF_TOKEN)

	# =============================
	# 3) File Text Extraction
	# =============================
	def extract_text(file_path: str) -> str:
	if not file_path:
	return ""
	_, ext = os.path.splitext(file_path.lower())
	text = ""
	if ext == ".pdf":
	doc = fitz.open(file_path)
	for page in doc:
	text += page.get_text("text")
	elif ext == ".docx":
	doc = docx.Document(file_path)
	for para in doc.paragraphs:
	text += para.text + "\n"
	else:
	with open(file_path, "rb") as f:
	text = f.read().decode("utf-8", errors="ignore")
	return text

	# =============================
	# 4) Build FAISS Index
	# =============================
	def build_faiss(text: str, chunk_size=500, overlap=50):
	if not text.strip():
	return None, None

	chunks = []
	step = max(1, chunk_size - overlap)
	for i in range(0, len(text), step):
	chunk = text[i:i + chunk_size]
	if chunk.strip():
	chunks.append(chunk)

	if not chunks:
	return None, None

	embeddings = embedding_model.encode(chunks, convert_to_numpy=True, normalize_embeddings=True)
	dim = embeddings.shape[1]
	index = faiss.IndexFlatIP(dim)
	index.add(embeddings)
	return index, chunks

	# =============================
	# 5) Globals (indexed docs)
	# =============================
	doc_index = None
	doc_chunks = None

	# =============================
	# 6) Handlers
	# =============================
	def upload_file(file_path: str):
	global doc_index, doc_chunks
	if not file_path:
	return "⚠️ Please upload a file first."
	text = extract_text(file_path)
	idx, chunks = build_faiss(text)
	if idx is None:
	return "⚠️ Could not index: file appears empty."
	doc_index, doc_chunks = idx, chunks
	return f"✅ Document indexed! {len(chunks)} chunks ready."

	def answer_query(query: str):
	global doc_index, doc_chunks
	if not query or not query.strip():
	return "⚠️ Please enter a question."
	if doc_index is None or not doc_chunks:
	return "⚠️ Please upload and index a document first."

	# ---- Retrieve context ----
	q_vec = embedding_model.encode([query], convert_to_numpy=True, normalize_embeddings=True)
	D, I = doc_index.search(q_vec, k=min(5, len(doc_chunks)))
	retrieved = [doc_chunks[i] for i in I[0] if 0 <= i < len(doc_chunks)]
	context = "\n".join(retrieved)

	# ---- Final Answer ----
	final_prompt = f"""
	[INST] You are a helpful tutor. Based only on the context below, answer the question.
	If not in context, say "I could not find this in the text."
	Context:
	{context}
	Question: {query}
	Answer: [/INST]
	"""
	inputs = tokenizer(final_prompt, return_tensors="pt", truncation=True).to(llm.device)
	outputs = llm.generate(**inputs, max_new_tokens=300, temperature=0.7, top_p=0.9, do_sample=True)
	answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

	if "Answer:" in answer:
	answer = answer.split("Answer:")[-1].strip()
	return answer

	def synthesize_with_gtts(text: str, out_path="out.mp3"):
	tts = gTTS(text=text, lang="en")
	tts.save(out_path)
	return out_path

	def voice_query(audio_path: str):
	if not audio_path:
	return "⚠️ Please record your question.", "", None

	# 1) Speech -> Text
	asr = stt_model(audio_path)
	recognized = asr.get("text", "").strip()
	if not recognized:
	return "⚠️ Could not transcribe audio.", "", None

	# 2) Answer Query
	ans = answer_query(recognized)

	# 3) Text -> Speech
	mp3_path = synthesize_with_gtts(ans, "answer.mp3")

	return recognized, ans, mp3_path

	# =============================
	# 7) Gradio UI
	# =============================
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="cyan")) as demo:
	gr.Markdown("# 📚 RAG Chatbot + 🎤 Voice (Whisper + gTTS)")
	gr.Markdown("Upload a PDF/DOCX/TXT and ask by typing or speaking.")

	with gr.Row():
	with gr.Column(scale=1):
	file_input = gr.File(label="📂 Upload Document", type="filepath")
	upload_btn = gr.Button("⚡ Index Document", variant="primary")
	status = gr.Textbox(label="Status", interactive=False)

	with gr.Column(scale=2):
	gr.Markdown("### ✍️ Text Chat")
	query = gr.Textbox(label="❓ Ask a Question", placeholder="e.g., What are the key points?")
	ask_btn = gr.Button("🚀 Get Answer", variant="primary")
	answer = gr.Textbox(label="💡 Answer", lines=8)

	gr.Markdown("### 🎤 Voice Chat")
	mic_input = gr.Audio(sources=["microphone"], type="filepath", label="🎙️ Speak your question")
	rec_text = gr.Textbox(label="📝 Recognized Speech", interactive=False)
	v_answer = gr.Textbox(label="💡 Answer (voice)", lines=8)
	v_audio = gr.Audio(label="🔊 Bot Voice Reply")

	# Bind events
	upload_btn.click(fn=upload_file, inputs=file_input, outputs=status)
	ask_btn.click(fn=answer_query, inputs=query, outputs=answer)
	mic_input.change(fn=voice_query, inputs=mic_input, outputs=[rec_text, v_answer, v_audio])

	demo.launch()