sreejang commited on
Commit
7acbb1a
·
verified ·
1 Parent(s): 02b5621

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -0
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import gradio as gr
4
+ import faiss
5
+ import numpy as np
6
+
7
+ from groq import Groq
8
+ from pypdf import PdfReader
9
+ from sentence_transformers import SentenceTransformer
10
+ from dotenv import load_dotenv
11
+
12
+ # =====================================
13
+ # LOAD ENV VARIABLES
14
+ # =====================================
15
+ load_dotenv()
16
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
17
+ client = Groq(api_key=GROQ_API_KEY)
18
+
19
+ # =====================================
20
+ # CONFIG
21
+ # =====================================
22
+ GROQ_MODEL = "llama-3.3-70b-versatile"
23
+ EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
24
+
25
+ # Put your public Google Drive links here
26
+ PDF_LINKS = [
27
+ "https://drive.google.com/file/d/1J89BeL4W-LpwaE4hY_81qPxw23DdXfqJ/view?usp=sharing"
28
+ ]
29
+
30
+ # =====================================
31
+ # DOWNLOAD PDF FROM GOOGLE DRIVE
32
+ # =====================================
33
+ def download_pdf_from_drive(link, save_path):
34
+ file_id = link.split("/d/")[1].split("/")[0]
35
+ download_url = f"https://drive.google.com/uc?id={file_id}"
36
+ response = requests.get(download_url)
37
+ with open(save_path, "wb") as f:
38
+ f.write(response.content)
39
+
40
+ # =====================================
41
+ # LOAD PDF TEXT
42
+ # =====================================
43
+ def load_pdf_text(pdf_path):
44
+ reader = PdfReader(pdf_path)
45
+ text = ""
46
+ for page in reader.pages:
47
+ text += page.extract_text() or ""
48
+ return text
49
+
50
+ # =====================================
51
+ # CHUNK TEXT
52
+ # =====================================
53
+ def chunk_text(text, chunk_size=500, overlap=100):
54
+ chunks = []
55
+ start = 0
56
+ while start < len(text):
57
+ end = start + chunk_size
58
+ chunks.append(text[start:end])
59
+ start += chunk_size - overlap
60
+ return chunks
61
+
62
+ # =====================================
63
+ # BUILD VECTOR DATABASE
64
+ # =====================================
65
+ all_chunks = []
66
+ os.makedirs("data", exist_ok=True)
67
+
68
+ for i, link in enumerate(PDF_LINKS):
69
+ pdf_path = f"data/doc_{i}.pdf"
70
+ download_pdf_from_drive(link, pdf_path)
71
+ text = load_pdf_text(pdf_path)
72
+ chunks = chunk_text(text)
73
+ all_chunks.extend(chunks)
74
+
75
+ embed_model = SentenceTransformer(EMBED_MODEL)
76
+ embeddings = embed_model.encode(all_chunks)
77
+ embeddings = np.array(embeddings).astype("float32")
78
+
79
+ dimension = embeddings.shape[1]
80
+ index = faiss.IndexFlatL2(dimension)
81
+ index.add(embeddings)
82
+
83
+ # =====================================
84
+ # RAG FUNCTION
85
+ # =====================================
86
+ def answer_question(question):
87
+ query_embedding = embed_model.encode([question])
88
+ query_embedding = np.array(query_embedding).astype("float32")
89
+
90
+ distances, indices = index.search(query_embedding, 3)
91
+
92
+ context = ""
93
+ for idx in indices[0]:
94
+ context += all_chunks[idx] + "\n\n"
95
+
96
+ prompt = f"""
97
+ Use the context below to answer the question.
98
+ If answer is not in the context, say you don't know.
99
+
100
+ Context:
101
+ {context}
102
+
103
+ Question:
104
+ {question}
105
+ """
106
+
107
+ response = client.chat.completions.create(
108
+ messages=[{"role": "user", "content": prompt}],
109
+ model=GROQ_MODEL,
110
+ )
111
+
112
+ return response.choices[0].message.content
113
+
114
+ # =====================================
115
+ # GRADIO UI
116
+ # =====================================
117
+ custom_css = """
118
+ body { background: #0f172a; }
119
+ h1 { text-align: center; color: white; }
120
+ button {
121
+ background: linear-gradient(90deg,#6366f1,#8b5cf6);
122
+ color: white !important;
123
+ border-radius: 10px !important;
124
+ }
125
+ textarea { border-radius: 12px !important; }
126
+ """
127
+
128
+ with gr.Blocks(css=custom_css) as demo:
129
+ gr.Markdown("# 📚 Public Google Drive RAG")
130
+
131
+ question = gr.Textbox(
132
+ label="Ask your question",
133
+ placeholder="Type your question...",
134
+ lines=3
135
+ )
136
+
137
+ ask_btn = gr.Button("Ask Question")
138
+
139
+ answer = gr.Textbox(
140
+ label="Answer",
141
+ lines=10
142
+ )
143
+
144
+ ask_btn.click(answer_question, inputs=question, outputs=answer)
145
+
146
+ demo.launch()