Spaces:

HuzaifaTech
/

ML_Tutor

Sleeping

App Files Files Community

HuzaifaTech commited on Apr 22

Commit

1c1f14a

verified ·

1 Parent(s): 534fddc

Create requirements.py

Browse files

Files changed (1) hide show

requirements.py +135 -0

requirements.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import requests
+import fitz
+import numpy as np
+import faiss
+from sentence_transformers import SentenceTransformer
+from groq import Groq
+import gradio as gr
+import os
+# =========================
+# 1. LOAD API KEY (HF SECRET)
+# =========================
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+client = Groq(api_key=GROQ_API_KEY)
+# =========================
+# 2. LOAD PDF
+# =========================
+pdf_url = "https://huggingface.co/datasets/HuzaifaTech/rag_file/resolve/main/Hands_On_Machine_Learning_with_Scikit_Le.pdf"
+pdf_path = "file.pdf"
+if not os.path.exists(pdf_path):
+    response = requests.get(pdf_url)
+    with open(pdf_path, "wb") as f:
+        f.write(response.content)
+# =========================
+# 3. EXTRACT TEXT
+# =========================
+doc = fitz.open(pdf_path)
+text = ""
+for page in doc:
+    text += page.get_text()
+# =========================
+# 4. CHUNKING
+# =========================
+def chunk_text(text, chunk_size=800):
+    paragraphs = text.split("\n")
+    chunks = []
+    current = ""
+    for para in paragraphs:
+        if len(current) + len(para) < chunk_size:
+            current += para + "\n"
+        else:
+            chunks.append(current.strip())
+            current = para
+    if current:
+        chunks.append(current.strip())
+    return chunks
+chunks = chunk_text(text)[:300]
+# =========================
+# 5. EMBEDDINGS
+# =========================
+model = SentenceTransformer("all-MiniLM-L6-v2")
+embeddings = model.encode(chunks, batch_size=32)
+faiss.normalize_L2(embeddings)
+# =========================
+# 6. FAISS
+# =========================
+dim = embeddings.shape[1]
+index = faiss.IndexFlatL2(dim)
+index.add(embeddings)
+# =========================
+# 7. RETRIEVAL
+# =========================
+def retrieve(query, k=4):
+    q_emb = model.encode([query])
+    faiss.normalize_L2(q_emb)
+    _, idx = index.search(q_emb, k)
+    return [chunks[i] for i in idx[0]]
+# =========================
+# 8. GENERATION
+# =========================
+def generate_answer(query):
+    docs = retrieve(query)
+    context = "\n\n".join(docs)
+    prompt = f"""
+Context:
+{context}
+Question:
+{query}
+"""
+    try:
+        res = client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "Answer ONLY from the provided context. If not found, say 'I don't know'."
+                },
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ],
+            temperature=0,
+            max_tokens=500
+        )
+        return res.choices[0].message.content
+    except Exception as e:
+        return f"Error: {str(e)}"
+# =========================
+# 9. UI (PROFESSIONAL)
+# =========================
+def chat(message, history):
+    return generate_answer(message)
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📚 RAG Chatbot (ML Book)")
+    gr.Markdown("Ask questions from *Hands-On Machine Learning* PDF")
+    chatbot = gr.ChatInterface(
+        fn=chat,
+        chatbot=gr.Chatbot(height=400),
+        textbox=gr.Textbox(placeholder="Ask a question...", container=False),
+    )
+demo.launch()