import gradio as gr import os import requests from pypdf import PdfReader from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity # ============================== # GROQ API SETUP # ============================== GROQ_API_KEY = os.environ.get("GROQ_API_KEY") API_URL = "https://api.groq.com/openai/v1/chat/completions" # ============================== # MEMORY STORAGE # ============================== documents = [] vectorizer = TfidfVectorizer(stop_words="english") doc_vectors = None # ============================== # SAFE PDF READING (HF Compatible) # ============================== def extract_text_from_pdf(file_obj): text = "" reader = PdfReader(file_obj) for page in reader.pages: content = page.extract_text() if content: text += content return text # ============================== # TEXT CHUNKING # ============================== def chunk_text(text, chunk_size=400): words = text.split() return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] # ============================== # PROCESS MULTIPLE FILES # ============================== def upload_files(files): global documents, doc_vectors if not files: return "⚠️ Please upload files." added_chunks = 0 try: for file in files: text = extract_text_from_pdf(file) if not text.strip(): continue chunks = chunk_text(text) documents.extend(chunks) added_chunks += len(chunks) if not documents: return "❌ No readable text found in PDFs." doc_vectors = vectorizer.fit_transform(documents) return f"✅ Files processed successfully! Added {added_chunks} study sections." except Exception as e: return f"❌ Error while processing files: {str(e)}" # ============================== # SEARCH CONTEXT # ============================== def retrieve_context(question, top_k=3): global doc_vectors if doc_vectors is None or len(documents) == 0: return None q_vec = vectorizer.transform([question]) similarity = cosine_similarity(q_vec, doc_vectors).flatten() top_indices = similarity.argsort()[-top_k:][::-1] context = "\n\n".join([documents[i] for i in top_indices]) return context # ============================== # GROQ CALL (WITH ERROR HANDLING) # ============================== def ask_ai(question): if not GROQ_API_KEY: return "❌ GROQ_API_KEY not set in Space Secrets." context = retrieve_context(question) if context is None: return "⚠️ Upload study material first." prompt = f""" Answer ONLY using this study material. Material: {context} Question: {question} """ headers = { "Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json" } payload = { "model": "llama3-8b-8192", "messages": [{"role": "user", "content": prompt}], "temperature": 0.2 } try: response = requests.post(API_URL, headers=headers, json=payload, timeout=60) if response.status_code != 200: return f"❌ Groq API Error: {response.text}" result = response.json() return result["choices"][0]["message"]["content"] except Exception as e: return f"❌ Connection Error: {str(e)}" # ============================== # RESET LIBRARY # ============================== def reset_library(): global documents, doc_vectors documents = [] doc_vectors = None return "🗑 Library cleared." # ============================== # UI # ============================== with gr.Blocks(title="AI StudyHub") as app: gr.Markdown("# 🎓 AI StudyHub") gr.Markdown("Upload books → Ask questions → AI learns from YOUR material.") with gr.Tab("📚 Upload Study Material"): file_input = gr.File(file_types=[".pdf"], file_count="multiple") upload_btn = gr.Button("Process Files") reset_btn = gr.Button("Reset Library") status = gr.Textbox(label="Status") upload_btn.click(upload_files, inputs=file_input, outputs=status) reset_btn.click(reset_library, outputs=status) with gr.Tab("🤖 Ask AI"): question = gr.Textbox(label="Ask a question from your notes") ask_btn = gr.Button("Ask") answer = gr.Textbox(label="Answer", lines=12) ask_btn.click(ask_ai, inputs=question, outputs=answer) app.launch()