Spaces:
Sleeping
Sleeping
| import fitz | |
| import tempfile | |
| import gradio as gr | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.docstore.document import Document | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.chains import RetrievalQA | |
| from langchain_community.llms import HuggingFacePipeline | |
| from transformers import pipeline | |
| # Load and chunk PDF | |
| def load_pdf_chunks(file_path, chunk_size=500, chunk_overlap=50): | |
| doc = fitz.open(file_path) | |
| text = "\n".join([page.get_text() for page in doc]) | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) | |
| chunks = splitter.split_text(text) | |
| return [Document(page_content=chunk, metadata={"source": file_path}) for chunk in chunks if chunk.strip()] | |
| # Setup RAG pipeline | |
| def setup_rag(documents): | |
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| vectorstore = FAISS.from_documents(documents, embeddings) | |
| retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 4, "fetch_k": 8, "lambda_mult": 0.5}) | |
| gen_pipeline = pipeline("text2text-generation", model="google/flan-t5-base", max_length=128) | |
| llm = HuggingFacePipeline(pipeline=gen_pipeline) | |
| chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True) | |
| return chain | |
| # Global RAG chain (updated on upload) | |
| qa_chain = None | |
| def upload_pdf(file): | |
| global qa_chain | |
| pdf_path = file.name | |
| docs = load_pdf_chunks(pdf_path) | |
| qa_chain = setup_rag(docs) | |
| return "PDF uploaded and indexed!" | |
| def query_rag(question): | |
| if qa_chain is None: | |
| return "Upload a PDF first!" | |
| result = qa_chain({"query": question}) | |
| return result["result"] | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## ๐ง RAG App with MMR + PDF Upload (Hugging Face Demo)") | |
| with gr.Row(): | |
| file = gr.File(label="Upload a PDF", file_types=[".pdf"]) | |
| upload_btn = gr.Button("Upload and Index") | |
| status = gr.Textbox(label="Status") | |
| upload_btn.click(upload_pdf, inputs=file, outputs=status) | |
| with gr.Row(): | |
| question = gr.Textbox(label="Enter your question") | |
| answer = gr.Textbox(label="Answer") | |
| answer_btn = gr.Button("Answer") | |
| answer_btn.click(query_rag, inputs=question, outputs=answer) | |
| if __name__ == "__main__": | |
| demo.launch() |