muhammadrazapathan commited on
Commit
01b36b2
Β·
verified Β·
1 Parent(s): 86653cd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -0
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import tempfile
4
+ from dotenv import load_dotenv
5
+
6
+ from langchain_community.document_loaders import PyPDFLoader
7
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
+
11
+ from groq import Groq
12
+
13
+ # ================== LOAD ENV ==================
14
+ load_dotenv()
15
+ GROQ_API_KEY = os.getenv("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")
16
+
17
+ if not GROQ_API_KEY:
18
+ raise ValueError("❌ GROQ_API_KEY not found. Please set it in Hugging Face Secrets.")
19
+
20
+ client = Groq(api_key=GROQ_API_KEY)
21
+
22
+ # ================== GLOBAL VECTOR DB ==================
23
+ vector_db = None
24
+
25
+ # ================== LLM FUNCTION ==================
26
+ def groq_llm(prompt):
27
+ response = client.chat.completions.create(
28
+ model="llama-3.3-70b-versatile",
29
+ messages=[{"role": "user", "content": prompt}],
30
+ )
31
+ return response.choices[0].message.content
32
+
33
+
34
+ # ================== PDF PROCESSING ==================
35
+ def process_pdf(file):
36
+ global vector_db
37
+
38
+ if file is None:
39
+ return "❌ Please upload a PDF file."
40
+
41
+ # Save uploaded file
42
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
43
+ tmp.write(file.read())
44
+ pdf_path = tmp.name
45
+
46
+ # Load PDF
47
+ loader = PyPDFLoader(pdf_path)
48
+ documents = loader.load()
49
+
50
+ # Split text into chunks
51
+ splitter = RecursiveCharacterTextSplitter(
52
+ chunk_size=500,
53
+ chunk_overlap=100
54
+ )
55
+ docs = splitter.split_documents(documents)
56
+
57
+ # Create embeddings
58
+ embeddings = HuggingFaceEmbeddings(
59
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
60
+ )
61
+
62
+ # Create vector database
63
+ vector_db = FAISS.from_documents(docs, embeddings)
64
+
65
+ return f"βœ… Document processed successfully! {len(docs)} chunks created."
66
+
67
+
68
+ # ================== QUESTION ANSWERING ==================
69
+ def ask_question(question):
70
+ global vector_db
71
+
72
+ if vector_db is None:
73
+ return "❌ Please upload and process a document first."
74
+
75
+ retriever = vector_db.as_retriever(search_kwargs={"k": 3})
76
+ docs = retriever.invoke(question)
77
+
78
+ context = "\n\n".join([doc.page_content for doc in docs])
79
+
80
+ prompt = f"""
81
+ You are an intelligent assistant.
82
+ Use ONLY the context below to answer the question.
83
+
84
+ Context:
85
+ {context}
86
+
87
+ Question:
88
+ {question}
89
+
90
+ Answer:
91
+ """
92
+
93
+ return groq_llm(prompt)
94
+
95
+
96
+ # ================== GRADIO UI ==================
97
+ with gr.Blocks(title="πŸ“„ RAG PDF Question Answering App") as demo:
98
+ gr.Markdown("# πŸ“„ RAG PDF Question Answering App")
99
+ gr.Markdown("Upload a PDF and ask questions about it.")
100
+
101
+ with gr.Row():
102
+ pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
103
+ process_btn = gr.Button("πŸ“₯ Process Document")
104
+
105
+ status = gr.Textbox(label="Status", interactive=False)
106
+
107
+ with gr.Row():
108
+ question = gr.Textbox(label="Ask a Question")
109
+ answer = gr.Textbox(label="Answer", interactive=False)
110
+
111
+ process_btn.click(process_pdf, inputs=pdf_upload, outputs=status)
112
+ question.submit(ask_question, inputs=question, outputs=answer)
113
+
114
+ demo.launch()