import streamlit as st from pypdf import PdfReader from docx import Document import tempfile import requests from gtts import gTTS from PIL import Image from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.prompts import PromptTemplate from langchain.llms import HuggingFacePipeline from transformers import pipeline # Setup HuggingFace pipeline with distilgpt2 (CPU) text_gen_pipeline = pipeline( "text-generation", model="distilgpt2", device=-1 # CPU only ) llm = HuggingFacePipeline(pipeline=text_gen_pipeline) # Streamlit app config st.set_page_config(page_title="Learning with Fun", layout="wide") st.title("📘 Learning with Fun - Kids QA App") st.markdown("Ask questions from your syllabus! 📚") # Sidebar widgets grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"]) subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"]) mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"]) voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True) # Fetch syllabus file from Google Drive link def fetch_from_gdrive(link: str) -> str | None: file_id = None if "id=" in link: file_id = link.split("id=")[1].split("&")[0] elif "/d/" in link: file_id = link.split("/d/")[1].split("/")[0] if not file_id: return None url = f"https://drive.google.com/uc?export=download&id={file_id}" response = requests.get(url) if response.status_code == 200: tmp_file = tempfile.NamedTemporaryFile(delete=False) tmp_file.write(response.content) tmp_file.close() return tmp_file.name return None uploaded_file = None file_link = st.text_input("Paste Google Drive Link to Syllabus File (.pdf or .docx)") if file_link: filepath = fetch_from_gdrive(file_link) if filepath: uploaded_file = filepath else: st.error("Invalid Google Drive link or download error.") # Extract text content from uploaded file def extract_text(file_path: str) -> str: text = "" if file_path.endswith(".pdf"): reader = PdfReader(file_path) for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text elif file_path.endswith(".docx"): doc = Document(file_path) for para in doc.paragraphs: text += para.text + "\n" else: st.error("Unsupported file format. Please upload a PDF or DOCX file.") return text # Create vector store for similarity search def create_vectorstore(text: str) -> FAISS: splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) docs = splitter.create_documents([text]) embeddings = HuggingFaceEmbeddings() vectorstore = FAISS.from_documents(docs, embeddings) return vectorstore # Prompt templates story_prompt = PromptTemplate.from_template( "ایک طالب علم نے سوال کیا: {question}\n" "نصاب کی معلومات: {context}\n" "برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔" ) explain_prompt = PromptTemplate.from_template( "سوال: {question}\n" "نصاب کا سیاق و سباق: {context}\n" "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔" ) # Generate speech audio from text def generate_voice(text: str, lang='ur') -> str: tts = gTTS(text=text, lang=lang) tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(tts_file.name) return tts_file.name # Generate answer using vectorstore context and LLM def get_answer(query: str, vectorstore: FAISS, mode: str) -> str: retriever = vectorstore.as_retriever() docs = retriever.get_relevant_documents(query) context = "\n".join([doc.page_content for doc in docs]) if mode == "📖 Storytelling": prompt = story_prompt.format(question=query, context=context) else: prompt = explain_prompt.format(question=query, context=context) answer = llm.invoke(prompt) return answer # Main app flow if uploaded_file: raw_text = extract_text(uploaded_file) if not raw_text.strip(): st.error("No text extracted from the file. Please check the file content.") else: st.success("📄 Syllabus loaded successfully!") query = st.text_input("❓ Ask your question (Urdu or English)") if query: with st.spinner("Thinking..."): vectorstore = create_vectorstore(raw_text) answer = get_answer(query, vectorstore, mode) st.markdown("### ✅ Answer:") st.write(answer) if voice_enabled: audio_file = generate_voice(answer) with open(audio_file, "rb") as audio: st.audio(audio.read(), format="audio/mp3") else: st.info("Please paste a Google Drive link to your syllabus file (.pdf or .docx) above.")