Spaces:

saranya19b
/

studymate

Configuration error

saranya19b commited on Aug 1, 2025

Commit

3f7f9d9

verified ·

1 Parent(s): eb0d386

Upload 5 files

Files changed (5) hide show

README.md CHANGED Viewed

@@ -1,14 +1,28 @@
----
-title: Studymate
-emoji: 🏢
-colorFrom: red
-colorTo: gray
-sdk: gradio
-sdk_version: 5.39.0
-app_file: app.py
-pinned: false
-license: mit
-short_description: questions and answers based on pdfs uploaded
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# StudyMate: AI-Powered PDF-Based Q&A System
+StudyMate allows students to upload a PDF and ask questions using a powerful RAG-based AI model.
+### 💡 Features
+- 📄 PDF Upload & Parsing
+- 🔍 OCR and Content Extraction
+- 🤖 RAG-based Q&A using Hugging Face
+- 🌐 Built with Streamlit, deployable on Hugging Face Spaces
+### 🚀 How to Run
+```bash
+pip install -r requirements.txt
+streamlit run main.py
+```
+### 📁 Folder Structure
+```
+studymate/
+├── main.py
+├── requirements.txt
+├── README.md
+└── backend/
+    ├── ocr.py
+    └── rag_model.py
+```

main.py ADDED Viewed

+import streamlit as st
+from backend.ocr import extract_text_from_pdf
+from backend.rag_model import setup_retriever_and_qa, get_answer
+st.set_page_config(page_title="StudyMate - PDF Q&A", layout="wide")
+st.title("📚 StudyMate: AI-Powered PDF-Based Q&A System")
+# Initialize retriever and RAG model
+retriever, rag_pipeline = setup_retriever_and_qa()
+uploaded_file = st.file_uploader("Upload your study material (PDF)", type="pdf")
+if uploaded_file:
+    with st.spinner("Extracting content from PDF..."):
+        full_text = extract_text_from_pdf(uploaded_file)
+        st.success("PDF content extracted!")
+    if full_text:
+        st.text_area("📄 Extracted Text Preview", full_text[:2000], height=300)
+        query = st.text_input("💬 Ask a question based on the PDF")
+        if query:
+            with st.spinner("Thinking..."):
+                answer = get_answer(full_text, query, retriever, rag_pipeline)
+            st.markdown(f"**🧠 Answer:** {answer}")
+    else:
+        st.error("Failed to extract text from the PDF.")
+else:
+    st.info("Please upload a PDF to get started.")

ocr.py ADDED Viewed

+import fitz  # PyMuPDF
+def extract_text_from_pdf(file) -> str:
+    doc = fitz.open(stream=file.read(), filetype="pdf")
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text.strip()

rag_model.py ADDED Viewed

+from transformers import RagTokenizer, RagRetriever, RagTokenForGeneration
+from transformers import pipeline
+import torch
+def setup_retriever_and_qa():
+    tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-base")
+    retriever = RagRetriever.from_pretrained("facebook/rag-token-base", index_name="exact", use_dummy_dataset=True)
+    rag_model = RagTokenForGeneration.from_pretrained("facebook/rag-token-base")
+    qa_pipeline = pipeline("text2text-generation", model=rag_model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
+    return retriever, qa_pipeline
+def get_answer(context: str, question: str, retriever, qa_pipeline):
+    input_text = f"question: {question} context: {context}"
+    result = qa_pipeline(input_text, max_length=200, do_sample=True)
+    return result[0]['generated_text']

requirements.txt ADDED Viewed

+streamlit
+transformers
+torch
+pymupdf