muhammadrazapathan's picture
Create app.py
a200b43 verified
import gradio as gr
import os
import requests
from pypdf import PdfReader
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# ==============================
# GROQ API SETUP
# ==============================
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
API_URL = "https://api.groq.com/openai/v1/chat/completions"
# ==============================
# MEMORY STORAGE
# ==============================
documents = []
vectorizer = TfidfVectorizer(stop_words="english")
doc_vectors = None
# ==============================
# SAFE PDF READING (HF Compatible)
# ==============================
def extract_text_from_pdf(file_obj):
text = ""
reader = PdfReader(file_obj)
for page in reader.pages:
content = page.extract_text()
if content:
text += content
return text
# ==============================
# TEXT CHUNKING
# ==============================
def chunk_text(text, chunk_size=400):
words = text.split()
return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
# ==============================
# PROCESS MULTIPLE FILES
# ==============================
def upload_files(files):
global documents, doc_vectors
if not files:
return "⚠️ Please upload files."
added_chunks = 0
try:
for file in files:
text = extract_text_from_pdf(file)
if not text.strip():
continue
chunks = chunk_text(text)
documents.extend(chunks)
added_chunks += len(chunks)
if not documents:
return "❌ No readable text found in PDFs."
doc_vectors = vectorizer.fit_transform(documents)
return f"βœ… Files processed successfully! Added {added_chunks} study sections."
except Exception as e:
return f"❌ Error while processing files: {str(e)}"
# ==============================
# SEARCH CONTEXT
# ==============================
def retrieve_context(question, top_k=3):
global doc_vectors
if doc_vectors is None or len(documents) == 0:
return None
q_vec = vectorizer.transform([question])
similarity = cosine_similarity(q_vec, doc_vectors).flatten()
top_indices = similarity.argsort()[-top_k:][::-1]
context = "\n\n".join([documents[i] for i in top_indices])
return context
# ==============================
# GROQ CALL (WITH ERROR HANDLING)
# ==============================
def ask_ai(question):
if not GROQ_API_KEY:
return "❌ GROQ_API_KEY not set in Space Secrets."
context = retrieve_context(question)
if context is None:
return "⚠️ Upload study material first."
prompt = f"""
Answer ONLY using this study material.
Material:
{context}
Question:
{question}
"""
headers = {
"Authorization": f"Bearer {GROQ_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": "llama3-8b-8192",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.2
}
try:
response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
if response.status_code != 200:
return f"❌ Groq API Error: {response.text}"
result = response.json()
return result["choices"][0]["message"]["content"]
except Exception as e:
return f"❌ Connection Error: {str(e)}"
# ==============================
# RESET LIBRARY
# ==============================
def reset_library():
global documents, doc_vectors
documents = []
doc_vectors = None
return "πŸ—‘ Library cleared."
# ==============================
# UI
# ==============================
with gr.Blocks(title="AI StudyHub") as app:
gr.Markdown("# πŸŽ“ AI StudyHub")
gr.Markdown("Upload books β†’ Ask questions β†’ AI learns from YOUR material.")
with gr.Tab("πŸ“š Upload Study Material"):
file_input = gr.File(file_types=[".pdf"], file_count="multiple")
upload_btn = gr.Button("Process Files")
reset_btn = gr.Button("Reset Library")
status = gr.Textbox(label="Status")
upload_btn.click(upload_files, inputs=file_input, outputs=status)
reset_btn.click(reset_library, outputs=status)
with gr.Tab("πŸ€– Ask AI"):
question = gr.Textbox(label="Ask a question from your notes")
ask_btn = gr.Button("Ask")
answer = gr.Textbox(label="Answer", lines=12)
ask_btn.click(ask_ai, inputs=question, outputs=answer)
app.launch()