Spaces:
Sleeping
Sleeping
| # app.py | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import Chroma | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.chains import RetrievalQA | |
| from langchain.llms.base import LLM | |
| from typing import List, Optional | |
| from groq import Groq | |
| import gradio as gr | |
| import os | |
| import uuid | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # β Groq LLM Wrapper | |
| class GroqLLM(LLM): | |
| model: str = "llama3-8b-8192" | |
| api_key: str = os.environ.get("GROQ_API_KEY") # Use env variable for safety | |
| temperature: float = 0.0 | |
| def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: | |
| client = Groq(api_key=self.api_key) | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| response = client.chat.completions.create( | |
| model=self.model, | |
| messages=messages, | |
| temperature=self.temperature, | |
| ) | |
| return response.choices[0].message.content | |
| def _llm_type(self) -> str: | |
| return "groq-llm" | |
| # β Session Store | |
| session_store = {} | |
| # β Process PDF File | |
| def process_pdf_and_setup_chain(pdf_file): | |
| if not pdf_file: | |
| return "β No PDF uploaded." | |
| file_path = pdf_file.name | |
| temp_dir = f"temp_{uuid.uuid4().hex}" | |
| os.makedirs(temp_dir, exist_ok=True) | |
| try: | |
| loader = PyPDFLoader(file_path) | |
| documents = loader.load() | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| docs = splitter.split_documents(documents) | |
| embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| vectorstore = Chroma.from_documents(docs, embedding, persist_directory=os.path.join(temp_dir, "chroma")) | |
| retriever = vectorstore.as_retriever() | |
| groq_llm = GroqLLM() | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=groq_llm, | |
| retriever=retriever, | |
| return_source_documents=True | |
| ) | |
| session_store["qa_chain"] = qa_chain | |
| session_store["temp_dir"] = temp_dir | |
| return "β PDF processed! You can now ask questions." | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| # β Answering Function | |
| def answer_question(query): | |
| qa_chain = session_store.get("qa_chain") | |
| if not qa_chain: | |
| return "β Please upload and process a PDF first." | |
| if not query.strip(): | |
| return "β Please enter a question." | |
| try: | |
| result = qa_chain({"query": query}) | |
| return result["result"] | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| # β Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## π PDF Q&A with LangChain + Groq LLaMA3") | |
| gr.Markdown("Upload a PDF, process it, and ask any question from its content.") | |
| with gr.Row(): | |
| pdf_input = gr.File(label="π Upload PDF", file_types=[".pdf"]) | |
| process_btn = gr.Button("βοΈ Process PDF") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| with gr.Row(): | |
| question = gr.Textbox(label="Ask a question", lines=2, placeholder="e.g. What is the document about?") | |
| ask_btn = gr.Button("π Ask") | |
| answer = gr.Textbox(label="Answer", interactive=False) | |
| process_btn.click(fn=process_pdf_and_setup_chain, inputs=pdf_input, outputs=status) | |
| ask_btn.click(fn=answer_question, inputs=question, outputs=answer) | |
| demo.launch() |