Spaces:
Configuration error
Configuration error
| import os | |
| import gradio as gr | |
| import tempfile | |
| from dotenv import load_dotenv | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from groq import Groq | |
| # ================== LOAD ENV ================== | |
| load_dotenv() | |
| GROQ_API_KEY = os.getenv("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") | |
| if not GROQ_API_KEY: | |
| raise ValueError("β GROQ_API_KEY not found. Please set it in Hugging Face Secrets.") | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # ================== GLOBAL VECTOR DB ================== | |
| vector_db = None | |
| # ================== LLM FUNCTION ================== | |
| def groq_llm(prompt): | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[{"role": "user", "content": prompt}], | |
| ) | |
| return response.choices[0].message.content | |
| # ================== PDF PROCESSING ================== | |
| def process_pdf(file): | |
| global vector_db | |
| if file is None: | |
| return "β Please upload a PDF file." | |
| # Save uploaded file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp.write(file.read()) | |
| pdf_path = tmp.name | |
| # Load PDF | |
| loader = PyPDFLoader(pdf_path) | |
| documents = loader.load() | |
| # Split text into chunks | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=100 | |
| ) | |
| docs = splitter.split_documents(documents) | |
| # Create embeddings | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| # Create vector database | |
| vector_db = FAISS.from_documents(docs, embeddings) | |
| return f"β Document processed successfully! {len(docs)} chunks created." | |
| # ================== QUESTION ANSWERING ================== | |
| def ask_question(question): | |
| global vector_db | |
| if vector_db is None: | |
| return "β Please upload and process a document first." | |
| retriever = vector_db.as_retriever(search_kwargs={"k": 3}) | |
| docs = retriever.invoke(question) | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| prompt = f""" | |
| You are an intelligent assistant. | |
| Use ONLY the context below to answer the question. | |
| Context: | |
| {context} | |
| Question: | |
| {question} | |
| Answer: | |
| """ | |
| return groq_llm(prompt) | |
| # ================== GRADIO UI ================== | |
| with gr.Blocks(title="π RAG PDF Question Answering App") as demo: | |
| gr.Markdown("# π RAG PDF Question Answering App") | |
| gr.Markdown("Upload a PDF and ask questions about it.") | |
| with gr.Row(): | |
| pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| process_btn = gr.Button("π₯ Process Document") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| with gr.Row(): | |
| question = gr.Textbox(label="Ask a Question") | |
| answer = gr.Textbox(label="Answer", interactive=False) | |
| process_btn.click(process_pdf, inputs=pdf_upload, outputs=status) | |
| question.submit(ask_question, inputs=question, outputs=answer) | |
| demo.launch() | |