Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import requests | |
| from pypdf import PdfReader | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # ============================== | |
| # GROQ API SETUP | |
| # ============================== | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| API_URL = "https://api.groq.com/openai/v1/chat/completions" | |
| # ============================== | |
| # MEMORY STORAGE | |
| # ============================== | |
| documents = [] | |
| vectorizer = TfidfVectorizer(stop_words="english") | |
| doc_vectors = None | |
| # ============================== | |
| # SAFE PDF READING (HF Compatible) | |
| # ============================== | |
| def extract_text_from_pdf(file_obj): | |
| text = "" | |
| reader = PdfReader(file_obj) | |
| for page in reader.pages: | |
| content = page.extract_text() | |
| if content: | |
| text += content | |
| return text | |
| # ============================== | |
| # TEXT CHUNKING | |
| # ============================== | |
| def chunk_text(text, chunk_size=400): | |
| words = text.split() | |
| return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] | |
| # ============================== | |
| # PROCESS MULTIPLE FILES | |
| # ============================== | |
| def upload_files(files): | |
| global documents, doc_vectors | |
| if not files: | |
| return "β οΈ Please upload files." | |
| added_chunks = 0 | |
| try: | |
| for file in files: | |
| text = extract_text_from_pdf(file) | |
| if not text.strip(): | |
| continue | |
| chunks = chunk_text(text) | |
| documents.extend(chunks) | |
| added_chunks += len(chunks) | |
| if not documents: | |
| return "β No readable text found in PDFs." | |
| doc_vectors = vectorizer.fit_transform(documents) | |
| return f"β Files processed successfully! Added {added_chunks} study sections." | |
| except Exception as e: | |
| return f"β Error while processing files: {str(e)}" | |
| # ============================== | |
| # SEARCH CONTEXT | |
| # ============================== | |
| def retrieve_context(question, top_k=3): | |
| global doc_vectors | |
| if doc_vectors is None or len(documents) == 0: | |
| return None | |
| q_vec = vectorizer.transform([question]) | |
| similarity = cosine_similarity(q_vec, doc_vectors).flatten() | |
| top_indices = similarity.argsort()[-top_k:][::-1] | |
| context = "\n\n".join([documents[i] for i in top_indices]) | |
| return context | |
| # ============================== | |
| # GROQ CALL (WITH ERROR HANDLING) | |
| # ============================== | |
| def ask_ai(question): | |
| if not GROQ_API_KEY: | |
| return "β GROQ_API_KEY not set in Space Secrets." | |
| context = retrieve_context(question) | |
| if context is None: | |
| return "β οΈ Upload study material first." | |
| prompt = f""" | |
| Answer ONLY using this study material. | |
| Material: | |
| {context} | |
| Question: | |
| {question} | |
| """ | |
| headers = { | |
| "Authorization": f"Bearer {GROQ_API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| payload = { | |
| "model": "llama3-8b-8192", | |
| "messages": [{"role": "user", "content": prompt}], | |
| "temperature": 0.2 | |
| } | |
| try: | |
| response = requests.post(API_URL, headers=headers, json=payload, timeout=60) | |
| if response.status_code != 200: | |
| return f"β Groq API Error: {response.text}" | |
| result = response.json() | |
| return result["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| return f"β Connection Error: {str(e)}" | |
| # ============================== | |
| # RESET LIBRARY | |
| # ============================== | |
| def reset_library(): | |
| global documents, doc_vectors | |
| documents = [] | |
| doc_vectors = None | |
| return "π Library cleared." | |
| # ============================== | |
| # UI | |
| # ============================== | |
| with gr.Blocks(title="AI StudyHub") as app: | |
| gr.Markdown("# π AI StudyHub") | |
| gr.Markdown("Upload books β Ask questions β AI learns from YOUR material.") | |
| with gr.Tab("π Upload Study Material"): | |
| file_input = gr.File(file_types=[".pdf"], file_count="multiple") | |
| upload_btn = gr.Button("Process Files") | |
| reset_btn = gr.Button("Reset Library") | |
| status = gr.Textbox(label="Status") | |
| upload_btn.click(upload_files, inputs=file_input, outputs=status) | |
| reset_btn.click(reset_library, outputs=status) | |
| with gr.Tab("π€ Ask AI"): | |
| question = gr.Textbox(label="Ask a question from your notes") | |
| ask_btn = gr.Button("Ask") | |
| answer = gr.Textbox(label="Answer", lines=12) | |
| ask_btn.click(ask_ai, inputs=question, outputs=answer) | |
| app.launch() |