HuzaifaTech commited on
Commit
1c1f14a
·
verified ·
1 Parent(s): 534fddc

Create requirements.py

Browse files
Files changed (1) hide show
  1. requirements.py +135 -0
requirements.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import fitz
3
+ import numpy as np
4
+ import faiss
5
+ from sentence_transformers import SentenceTransformer
6
+ from groq import Groq
7
+ import gradio as gr
8
+ import os
9
+
10
+ # =========================
11
+ # 1. LOAD API KEY (HF SECRET)
12
+ # =========================
13
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
14
+ client = Groq(api_key=GROQ_API_KEY)
15
+
16
+ # =========================
17
+ # 2. LOAD PDF
18
+ # =========================
19
+ pdf_url = "https://huggingface.co/datasets/HuzaifaTech/rag_file/resolve/main/Hands_On_Machine_Learning_with_Scikit_Le.pdf"
20
+
21
+ pdf_path = "file.pdf"
22
+
23
+ if not os.path.exists(pdf_path):
24
+ response = requests.get(pdf_url)
25
+ with open(pdf_path, "wb") as f:
26
+ f.write(response.content)
27
+
28
+ # =========================
29
+ # 3. EXTRACT TEXT
30
+ # =========================
31
+ doc = fitz.open(pdf_path)
32
+ text = ""
33
+
34
+ for page in doc:
35
+ text += page.get_text()
36
+
37
+ # =========================
38
+ # 4. CHUNKING
39
+ # =========================
40
+ def chunk_text(text, chunk_size=800):
41
+ paragraphs = text.split("\n")
42
+ chunks = []
43
+ current = ""
44
+
45
+ for para in paragraphs:
46
+ if len(current) + len(para) < chunk_size:
47
+ current += para + "\n"
48
+ else:
49
+ chunks.append(current.strip())
50
+ current = para
51
+
52
+ if current:
53
+ chunks.append(current.strip())
54
+
55
+ return chunks
56
+
57
+ chunks = chunk_text(text)[:300]
58
+
59
+ # =========================
60
+ # 5. EMBEDDINGS
61
+ # =========================
62
+ model = SentenceTransformer("all-MiniLM-L6-v2")
63
+
64
+ embeddings = model.encode(chunks, batch_size=32)
65
+ faiss.normalize_L2(embeddings)
66
+
67
+ # =========================
68
+ # 6. FAISS
69
+ # =========================
70
+ dim = embeddings.shape[1]
71
+ index = faiss.IndexFlatL2(dim)
72
+ index.add(embeddings)
73
+
74
+ # =========================
75
+ # 7. RETRIEVAL
76
+ # =========================
77
+ def retrieve(query, k=4):
78
+ q_emb = model.encode([query])
79
+ faiss.normalize_L2(q_emb)
80
+ _, idx = index.search(q_emb, k)
81
+ return [chunks[i] for i in idx[0]]
82
+
83
+ # =========================
84
+ # 8. GENERATION
85
+ # =========================
86
+ def generate_answer(query):
87
+ docs = retrieve(query)
88
+ context = "\n\n".join(docs)
89
+
90
+ prompt = f"""
91
+ Context:
92
+ {context}
93
+
94
+ Question:
95
+ {query}
96
+ """
97
+
98
+ try:
99
+ res = client.chat.completions.create(
100
+ model="llama-3.3-70b-versatile",
101
+ messages=[
102
+ {
103
+ "role": "system",
104
+ "content": "Answer ONLY from the provided context. If not found, say 'I don't know'."
105
+ },
106
+ {
107
+ "role": "user",
108
+ "content": prompt
109
+ }
110
+ ],
111
+ temperature=0,
112
+ max_tokens=500
113
+ )
114
+ return res.choices[0].message.content
115
+
116
+ except Exception as e:
117
+ return f"Error: {str(e)}"
118
+
119
+ # =========================
120
+ # 9. UI (PROFESSIONAL)
121
+ # =========================
122
+ def chat(message, history):
123
+ return generate_answer(message)
124
+
125
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
126
+ gr.Markdown("# 📚 RAG Chatbot (ML Book)")
127
+ gr.Markdown("Ask questions from *Hands-On Machine Learning* PDF")
128
+
129
+ chatbot = gr.ChatInterface(
130
+ fn=chat,
131
+ chatbot=gr.Chatbot(height=400),
132
+ textbox=gr.Textbox(placeholder="Ask a question...", container=False),
133
+ )
134
+
135
+ demo.launch()