Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import tempfile | |
| from dotenv import load_dotenv | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from groq import Groq | |
| # ================= ENVIRONMENT ================= | |
| load_dotenv() | |
| GROQ_API_KEY = os.getenv("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") | |
| client = None | |
| if GROQ_API_KEY: | |
| client = Groq(api_key=GROQ_API_KEY) | |
| vector_db = None | |
| # ================= LLM FUNCTION ================= | |
| def groq_llm(prompt): | |
| if client is None: | |
| return "β GROQ API key not set. Set it in Hugging Face Secrets." | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[{"role": "user", "content": prompt}], | |
| ) | |
| return response.choices[0].message.content | |
| # ================= PROCESS PDF ================= | |
| def process_pdf(file): | |
| global vector_db | |
| if file is None: | |
| return "β Please upload a PDF." | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp.write(file) | |
| pdf_path = tmp.name | |
| loader = PyPDFLoader(pdf_path) | |
| documents = loader.load() | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=100 | |
| ) | |
| docs = splitter.split_documents(documents) | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| vector_db = FAISS.from_documents(docs, embeddings) | |
| return f"β PDF processed successfully! {len(docs)} chunks created." | |
| # ================= ASK QUESTION ================= | |
| def ask_question(question): | |
| global vector_db | |
| if vector_db is None: | |
| return "β Please upload and process a document first." | |
| retriever = vector_db.as_retriever(search_kwargs={"k": 3}) | |
| docs = retriever.get_relevant_documents(question) | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| prompt = f""" | |
| You are an intelligent assistant. | |
| Answer ONLY using the provided context. | |
| Context: | |
| {context} | |
| Question: | |
| {question} | |
| Answer: | |
| """ | |
| return groq_llm(prompt) | |
| # ================= GRADIO UI ================= | |
| with gr.Blocks(title="π RAG PDF QA App") as demo: | |
| gr.Markdown("# π RAG (Retrieval-Augmented Generation) PDF QA") | |
| gr.Markdown("Upload a PDF and ask questions about its content.") | |
| if client is None: | |
| gr.Markdown("β οΈ GROQ_API_KEY not set. Set it in Hugging Face Secrets to enable answering.") | |
| with gr.Row(): | |
| pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| process_btn = gr.Button("Process PDF") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| question = gr.Textbox(label="Ask Question") | |
| answer = gr.Textbox(label="Answer", lines=10) | |
| process_btn.click(process_pdf, inputs=pdf_upload, outputs=status) | |
| question.submit(ask_question, inputs=question, outputs=answer) | |
| demo.launch() | |