|
|
|
|
|
|
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
from langchain_community.vectorstores import Chroma |
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
from langchain.document_loaders import PyPDFLoader |
|
|
from langchain.chains import RetrievalQA |
|
|
from langchain.llms.base import LLM |
|
|
|
|
|
from typing import List, Optional |
|
|
from groq import Groq |
|
|
import gradio as gr |
|
|
import os |
|
|
import uuid |
|
|
|
|
|
|
|
|
class GroqLLM(LLM): |
|
|
model: str = "llama3-8b-8192" |
|
|
api_key: str = os.environ.get("YOUR_GROQ_API_KEY") |
|
|
temperature: float = 0.0 |
|
|
|
|
|
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: |
|
|
client = Groq(api_key=self.api_key) |
|
|
messages = [ |
|
|
{"role": "system", "content": "You are a helpful assistant."}, |
|
|
{"role": "user", "content": prompt} |
|
|
] |
|
|
response = client.chat.completions.create( |
|
|
model=self.model, |
|
|
messages=messages, |
|
|
temperature=self.temperature, |
|
|
) |
|
|
return response.choices[0].message.content |
|
|
|
|
|
@property |
|
|
def _llm_type(self) -> str: |
|
|
return "groq-llm" |
|
|
|
|
|
|
|
|
session_store = {} |
|
|
|
|
|
|
|
|
def process_pdf_and_setup_chain(pdf_file): |
|
|
if not pdf_file: |
|
|
return "β No PDF uploaded." |
|
|
|
|
|
file_path = pdf_file.name |
|
|
temp_dir = f"temp_{uuid.uuid4().hex}" |
|
|
os.makedirs(temp_dir, exist_ok=True) |
|
|
|
|
|
try: |
|
|
loader = PyPDFLoader(file_path) |
|
|
documents = loader.load() |
|
|
|
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) |
|
|
docs = splitter.split_documents(documents) |
|
|
|
|
|
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
|
|
vectorstore = Chroma.from_documents(docs, embedding, persist_directory=os.path.join(temp_dir, "chroma")) |
|
|
|
|
|
retriever = vectorstore.as_retriever() |
|
|
groq_llm = GroqLLM() |
|
|
|
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
|
llm=groq_llm, |
|
|
retriever=retriever, |
|
|
return_source_documents=True |
|
|
) |
|
|
|
|
|
session_store["qa_chain"] = qa_chain |
|
|
session_store["temp_dir"] = temp_dir |
|
|
|
|
|
return "β
PDF processed! You can now ask questions." |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error: {str(e)}" |
|
|
|
|
|
|
|
|
def answer_question(query): |
|
|
qa_chain = session_store.get("qa_chain") |
|
|
if not qa_chain: |
|
|
return "β Please upload and process a PDF first." |
|
|
if not query.strip(): |
|
|
return "β Please enter a question." |
|
|
try: |
|
|
result = qa_chain({"query": query}) |
|
|
return result["result"] |
|
|
except Exception as e: |
|
|
return f"β Error: {str(e)}" |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## π PDF Q&A with LangChain + Groq LLaMA3") |
|
|
gr.Markdown("Upload a PDF, process it, and ask any question from its content.") |
|
|
|
|
|
with gr.Row(): |
|
|
pdf_input = gr.File(label="π Upload PDF", file_types=[".pdf"]) |
|
|
process_btn = gr.Button("βοΈ Process PDF") |
|
|
|
|
|
status = gr.Textbox(label="Status", interactive=False) |
|
|
|
|
|
with gr.Row(): |
|
|
question = gr.Textbox(label="Ask a question", lines=2, placeholder="e.g. What is the document about?") |
|
|
ask_btn = gr.Button("π Ask") |
|
|
|
|
|
answer = gr.Textbox(label="Answer", interactive=False) |
|
|
|
|
|
process_btn.click(fn=process_pdf_and_setup_chain, inputs=pdf_input, outputs=status) |
|
|
ask_btn.click(fn=answer_question, inputs=question, outputs=answer) |
|
|
|
|
|
demo.launch() |
|
|
|
|
|
|