Spaces:

asad231
/

RAG_e_Urdu_Intelligent_Urdu_PDF_Assistant

Sleeping

App Files Files Community

RAG_e_Urdu_Intelligent_Urdu_PDF_Assistant / app.py

asad231

Update app.py

4a28ae6 verified 6 months ago

raw

history blame contribute delete

3.65 kB

	import os
	import gradio as gr
	import fitz # PyMuPDF
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from gtts import gTTS

	# ----------------------------------------------------
	# 1️⃣ Extract text from PDF
	# ----------------------------------------------------
	def extract_text_from_pdf(pdf_file):
	text = ""
	with fitz.open(stream=pdf_file.read(), filetype="pdf") as doc:
	for page in doc:
	text += page.get_text("text")
	return text

	# ----------------------------------------------------
	# 2️⃣ Create FAISS Vector Database
	# ----------------------------------------------------
	def create_vectorstore(text):
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
	docs = text_splitter.create_documents([text])
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
	vectorstore = FAISS.from_documents(docs, embeddings)
	return vectorstore

	# ----------------------------------------------------
	# 3️⃣ Initialize Google Gemini Model
	# ----------------------------------------------------
	def get_model():
	os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY", "")
	return ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.2)

	# ----------------------------------------------------
	# 4️⃣ Chatbot Logic
	# ----------------------------------------------------
	def chat_with_pdf(pdf_file, user_input, history):
	if pdf_file is None:
	return history + [["❌ Please upload a PDF file first.", ""]]

	# Extract and create FAISS
	pdf_text = extract_text_from_pdf(pdf_file)
	vectorstore = create_vectorstore(pdf_text)
	retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

	# Retrieve relevant chunks
	docs = retriever.get_relevant_documents(user_input)
	context = "\n\n".join([d.page_content for d in docs])

	# Generate Answer
	prompt = ChatPromptTemplate.from_template(
	"""
	You are a helpful Urdu assistant. Answer in Urdu (Roman Urdu is fine if needed).
	Context from PDF:
	{context}

	User Question:
	{question}

	Give your answer in Urdu (Roman Urdu allowed).
	"""
	)

	llm = get_model()
	chain = prompt \| llm \| StrOutputParser()
	answer = chain.invoke({"context": context, "question": user_input})

	# Text-to-speech in Urdu
	tts = gTTS(answer, lang="ur")
	tts.save("response.mp3")

	history += [[user_input, answer]]
	return history, "response.mp3"

	# ----------------------------------------------------
	# 5️⃣ Gradio Interface
	# ----------------------------------------------------
	with gr.Blocks(title="📘 Urdu RAG Chatbot") as demo:
	gr.Markdown("## 🤖 Urdu RAG Chatbot — Ask questions from your PDF (Roman Urdu supported)")

	with gr.Row():
	pdf_file = gr.File(label="📄 Upload your PDF")

	chatbot = gr.Chatbot(label="Chat with your PDF (Urdu / Roman Urdu)")
	user_input = gr.Textbox(label="💬 Ask something about your PDF")
	audio_output = gr.Audio(label="🔊 Audio Answer", type="filepath")

	clear = gr.Button("🔄 Clear Chat")

	user_input.submit(chat_with_pdf, [pdf_file, user_input, chatbot], [chatbot, audio_output])
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.launch()