| import os |
| import gradio as gr |
| import fitz |
| from langchain_community.vectorstores import FAISS |
| from langchain_community.embeddings import HuggingFaceEmbeddings |
| from langchain_text_splitters import RecursiveCharacterTextSplitter |
| from langchain_google_genai import ChatGoogleGenerativeAI |
| from langchain_core.prompts import ChatPromptTemplate |
| from langchain_core.output_parsers import StrOutputParser |
| from gtts import gTTS |
|
|
| |
| |
| |
| def extract_text_from_pdf(pdf_file): |
| text = "" |
| with fitz.open(stream=pdf_file.read(), filetype="pdf") as doc: |
| for page in doc: |
| text += page.get_text("text") |
| return text |
|
|
| |
| |
| |
| def create_vectorstore(text): |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100) |
| docs = text_splitter.create_documents([text]) |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") |
| vectorstore = FAISS.from_documents(docs, embeddings) |
| return vectorstore |
|
|
| |
| |
| |
| def get_model(): |
| os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY", "") |
| return ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.2) |
|
|
| |
| |
| |
| def chat_with_pdf(pdf_file, user_input, history): |
| if pdf_file is None: |
| return history + [["β Please upload a PDF file first.", ""]] |
|
|
| |
| pdf_text = extract_text_from_pdf(pdf_file) |
| vectorstore = create_vectorstore(pdf_text) |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) |
|
|
| |
| docs = retriever.get_relevant_documents(user_input) |
| context = "\n\n".join([d.page_content for d in docs]) |
|
|
| |
| prompt = ChatPromptTemplate.from_template( |
| """ |
| You are a helpful Urdu assistant. Answer in Urdu (Roman Urdu is fine if needed). |
| Context from PDF: |
| {context} |
| |
| User Question: |
| {question} |
| |
| Give your answer in Urdu (Roman Urdu allowed). |
| """ |
| ) |
|
|
| llm = get_model() |
| chain = prompt | llm | StrOutputParser() |
| answer = chain.invoke({"context": context, "question": user_input}) |
|
|
| |
| tts = gTTS(answer, lang="ur") |
| tts.save("response.mp3") |
|
|
| history += [[user_input, answer]] |
| return history, "response.mp3" |
|
|
| |
| |
| |
| with gr.Blocks(title="π Urdu RAG Chatbot") as demo: |
| gr.Markdown("## π€ Urdu RAG Chatbot β Ask questions from your PDF (Roman Urdu supported)") |
|
|
| with gr.Row(): |
| pdf_file = gr.File(label="π Upload your PDF") |
| |
| chatbot = gr.Chatbot(label="Chat with your PDF (Urdu / Roman Urdu)") |
| user_input = gr.Textbox(label="π¬ Ask something about your PDF") |
| audio_output = gr.Audio(label="π Audio Answer", type="filepath") |
|
|
| clear = gr.Button("π Clear Chat") |
|
|
| user_input.submit(chat_with_pdf, [pdf_file, user_input, chatbot], [chatbot, audio_output]) |
| clear.click(lambda: None, None, chatbot, queue=False) |
|
|
| demo.launch() |
|
|