ogflash commited on
Commit
3b4d89a
Β·
verified Β·
1 Parent(s): e86869c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +112 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.vectorstores import Chroma
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.document_loaders import PyPDFLoader
7
+ from langchain.chains import RetrievalQA
8
+ from langchain.llms.base import LLM
9
+
10
+ from typing import List, Optional
11
+ from groq import Groq
12
+ import gradio as gr
13
+ import os
14
+ import uuid
15
+
16
+ # βœ… Groq LLM Wrapper
17
+ class GroqLLM(LLM):
18
+ model: str = "llama3-8b-8192"
19
+ api_key: str = os.environ.get("GROQ_API_KEY") # Use env variable for safety
20
+ temperature: float = 0.0
21
+
22
+ def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
23
+ client = Groq(api_key=self.api_key)
24
+ messages = [
25
+ {"role": "system", "content": "You are a helpful assistant."},
26
+ {"role": "user", "content": prompt}
27
+ ]
28
+ response = client.chat.completions.create(
29
+ model=self.model,
30
+ messages=messages,
31
+ temperature=self.temperature,
32
+ )
33
+ return response.choices[0].message.content
34
+
35
+ @property
36
+ def _llm_type(self) -> str:
37
+ return "groq-llm"
38
+
39
+ # βœ… Session Store
40
+ session_store = {}
41
+
42
+ # βœ… Process PDF File
43
+ def process_pdf_and_setup_chain(pdf_file):
44
+ if not pdf_file:
45
+ return "❌ No PDF uploaded."
46
+
47
+ file_path = pdf_file.name
48
+ temp_dir = f"temp_{uuid.uuid4().hex}"
49
+ os.makedirs(temp_dir, exist_ok=True)
50
+
51
+ try:
52
+ loader = PyPDFLoader(file_path)
53
+ documents = loader.load()
54
+
55
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
56
+ docs = splitter.split_documents(documents)
57
+
58
+ embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
59
+ vectorstore = Chroma.from_documents(docs, embedding, persist_directory=os.path.join(temp_dir, "chroma"))
60
+
61
+ retriever = vectorstore.as_retriever()
62
+ groq_llm = GroqLLM()
63
+
64
+ qa_chain = RetrievalQA.from_chain_type(
65
+ llm=groq_llm,
66
+ retriever=retriever,
67
+ return_source_documents=True
68
+ )
69
+
70
+ session_store["qa_chain"] = qa_chain
71
+ session_store["temp_dir"] = temp_dir
72
+
73
+ return "βœ… PDF processed! You can now ask questions."
74
+
75
+ except Exception as e:
76
+ return f"❌ Error: {str(e)}"
77
+
78
+ # βœ… Answering Function
79
+ def answer_question(query):
80
+ qa_chain = session_store.get("qa_chain")
81
+ if not qa_chain:
82
+ return "❌ Please upload and process a PDF first."
83
+ if not query.strip():
84
+ return "❗ Please enter a question."
85
+ try:
86
+ result = qa_chain({"query": query})
87
+ return result["result"]
88
+ except Exception as e:
89
+ return f"❌ Error: {str(e)}"
90
+
91
+ # βœ… Gradio UI
92
+ with gr.Blocks() as demo:
93
+ gr.Markdown("## πŸ“„ PDF Q&A with LangChain + Groq LLaMA3")
94
+ gr.Markdown("Upload a PDF, process it, and ask any question from its content.")
95
+
96
+ with gr.Row():
97
+ pdf_input = gr.File(label="πŸ“„ Upload PDF", file_types=[".pdf"])
98
+ process_btn = gr.Button("βš™οΈ Process PDF")
99
+
100
+ status = gr.Textbox(label="Status", interactive=False)
101
+
102
+ with gr.Row():
103
+ question = gr.Textbox(label="Ask a question", lines=2, placeholder="e.g. What is the document about?")
104
+ ask_btn = gr.Button("πŸ” Ask")
105
+
106
+ answer = gr.Textbox(label="Answer", interactive=False)
107
+
108
+ process_btn.click(fn=process_pdf_and_setup_chain, inputs=pdf_input, outputs=status)
109
+ ask_btn.click(fn=answer_question, inputs=question, outputs=answer)
110
+
111
+ demo.launch()
112
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-community
3
+ openai
4
+ chromadb
5
+ faiss-cpu
6
+ sentence-transformers
7
+ tiktoken
8
+ groq
9
+ pypdf
10
+ gradio