Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from pypdf import PdfReader | |
| from docx import Document | |
| from PIL import Image | |
| from gtts import gTTS | |
| import tempfile | |
| import io | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.prompts import PromptTemplate | |
| from langchain.llms import HuggingFacePipeline | |
| from transformers import pipeline | |
| # ------------------------ Setup HuggingFace LLM ----------------------- | |
| text_gen_pipeline = pipeline( | |
| "text-generation", | |
| model="distilgpt2", | |
| device=-1, # CPU | |
| max_new_tokens=150 | |
| ) | |
| llm = HuggingFacePipeline(pipeline=text_gen_pipeline) | |
| # -------------------------- Streamlit UI Setup ------------------------- | |
| st.set_page_config(page_title="Learning with Fun", layout="centered") | |
| st.markdown(""" | |
| <style> | |
| body { | |
| background: linear-gradient(to right, #f9f9f9, #e0f7fa); | |
| } | |
| .stApp { | |
| font-family: 'Segoe UI', sans-serif; | |
| } | |
| .title { | |
| text-align: center; | |
| font-size: 36px; | |
| font-weight: bold; | |
| color: #006064; | |
| margin-bottom: 10px; | |
| } | |
| .subtext { | |
| text-align: center; | |
| font-size: 18px; | |
| color: #00796B; | |
| margin-bottom: 30px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.markdown('<div class="title">📘 Learning with Fun</div>', unsafe_allow_html=True) | |
| st.markdown('<div class="subtext">Ask questions from your syllabus in a fun way!</div>', unsafe_allow_html=True) | |
| # -------------------------- Sidebar Controls ---------------------------- | |
| grade = st.sidebar.selectbox("🎓 Select Grade", ["Grade 5", "Grade 6"]) | |
| subject = st.sidebar.selectbox("📘 Select Subject", ["Science", "Math", "Computer", "Islamiyat"]) | |
| mode = st.sidebar.radio("🎯 Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"]) | |
| voice_enabled = st.sidebar.checkbox("🔈 Enable Voice Output", value=True) | |
| # --------------------- File Upload and Text Extraction ------------------- | |
| uploaded_file = st.file_uploader("📂 Upload Syllabus File (PDF, DOCX, JPEG, PNG)", type=["pdf", "docx", "jpeg", "jpg", "png"]) | |
| def extract_text(file) -> str: | |
| text = "" | |
| if file.type == "application/pdf": | |
| try: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp.write(file.read()) | |
| tmp.seek(0) | |
| reader = PdfReader(tmp.name) | |
| for page in reader.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text | |
| except Exception as e: | |
| st.error(f"Failed to read PDF: {e}") | |
| elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
| doc = Document(io.BytesIO(file.read())) | |
| for para in doc.paragraphs: | |
| text += para.text + "\n" | |
| elif file.type in ["image/jpeg", "image/png"]: | |
| try: | |
| import pytesseract | |
| image = Image.open(file) | |
| text = pytesseract.image_to_string(image) | |
| except ImportError: | |
| st.error("Please install pytesseract for image to text conversion.") | |
| else: | |
| st.error("Unsupported file format.") | |
| return text.strip() | |
| # -------------------- Create Vector Store ------------------------------- | |
| def create_vectorstore(text: str) -> FAISS: | |
| splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| docs = splitter.create_documents([text]) | |
| embeddings = HuggingFaceEmbeddings() | |
| vectorstore = FAISS.from_documents(docs, embeddings) | |
| return vectorstore | |
| # ------------------------ Prompt Templates ------------------------------ | |
| story_prompt = PromptTemplate.from_template( | |
| "ایک طالب علم نے سوال کیا: {question}\n" | |
| "نصاب کی معلومات: {context}\n" | |
| "برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔" | |
| ) | |
| explain_prompt = PromptTemplate.from_template( | |
| "سوال: {question}\n" | |
| "نصاب کا سیاق و سباق: {context}\n" | |
| "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔" | |
| ) | |
| # -------------------------- TTS Generator ------------------------------- | |
| def generate_voice(text: str, lang='ur') -> str: | |
| tts = gTTS(text=text, lang=lang) | |
| tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts.save(tts_file.name) | |
| return tts_file.name | |
| # -------------------------- Answer Generator ---------------------------- | |
| def get_answer(query: str, vectorstore: FAISS, mode: str) -> str: | |
| retriever = vectorstore.as_retriever() | |
| docs = retriever.get_relevant_documents(query) | |
| context = "\n".join([doc.page_content for doc in docs]) | |
| if mode == "📖 Storytelling": | |
| prompt = story_prompt.format(question=query, context=context) | |
| else: | |
| prompt = explain_prompt.format(question=query, context=context) | |
| result = llm.invoke(prompt) | |
| return result.strip() | |
| # ----------------------------- Main Logic ------------------------------- | |
| if uploaded_file: | |
| raw_text = extract_text(uploaded_file) | |
| if not raw_text: | |
| st.error("No text extracted from file.") | |
| else: | |
| st.success("✅ Syllabus loaded successfully!") | |
| query = st.text_input("💬 Ask a question (Urdu or English):") | |
| if query: | |
| with st.spinner("🤔 Thinking..."): | |
| vectorstore = create_vectorstore(raw_text) | |
| answer = get_answer(query, vectorstore, mode) | |
| st.markdown("### ✅ Answer:") | |
| st.write(answer) | |
| if voice_enabled: | |
| audio_path = generate_voice(answer) | |
| st.audio(audio_path, format="audio/mp3") | |
| else: | |
| st.info("Please upload your syllabus file to begin.") | |