# learning_with_fun_app.py import os import tempfile import streamlit as st import requests from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader, UnstructuredImageLoader from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_core.documents import Document from gtts import gTTS import base64 import shutil # ----------------------------- UI SETUP -------------------------------------- st.set_page_config(page_title="Learning with Fun", layout="wide") st.title("📚 Learning with Fun - Educational Q&A for Kids") # ----------------------------- USER INPUT ----------------------------------- grade = st.selectbox("Select your Grade", ["Grade 5", "Grade 6"]) subject = st.selectbox("Select Subject", ["Science", "Math", "English"]) uploaded_files = st.file_uploader("Upload textbook files (PDF, DOCX, JPEG)", type=["pdf", "docx", "jpg", "jpeg"], accept_multiple_files=True) question = st.text_input("Ask your question in English or Urdu") groq_api_key = st.text_input("🔐 Enter your GROQ API Key", type="password") # ------------------------- SETUP TEMP FOLDER ------------------------------- temp_dir = tempfile.mkdtemp() # ------------------------- UTILITY FUNCTIONS ------------------------------- def load_documents(uploaded_files): """Load various file types into LangChain Document format.""" docs = [] for file in uploaded_files: ext = file.name.split(".")[-1].lower() path = os.path.join(temp_dir, file.name) with open(path, "wb") as f: f.write(file.read()) if ext == "pdf": loader = PyMuPDFLoader(path) elif ext == "docx": loader = Docx2txtLoader(path) elif ext in ["jpg", "jpeg"]: loader = UnstructuredImageLoader(path) else: continue docs.extend(loader.load()) return docs def split_documents(documents): """Split documents into smaller chunks.""" splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) return splitter.split_documents(documents) def create_vector_store(chunks): """Create FAISS vector DB from text chunks.""" embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") return FAISS.from_documents(chunks, embeddings) def retrieve_docs(query, vector_store): """Search FAISS for relevant chunks.""" return vector_store.similarity_search(query, k=3) def query_llm_groq(context, query, groq_api_key): """Query GROQ LLaMA 3 API directly and return formatted answers.""" url = "https://api.groq.com/openai/v1/chat/completions" headers = { "Authorization": f"Bearer {groq_api_key}", "Content-Type": "application/json" } prompt = f""" Context: {context} Question: {query} Provide two outputs: 1. A simple, educational explanation in English + Urdu. 2. A creative storytelling version mixing English and Urdu. """ data = { "model": "llama3-8b-8192", "messages": [ {"role": "user", "content": prompt} ], "temperature": 0.7 } response = requests.post(url, headers=headers, json=data) response.raise_for_status() result = response.json() return result["choices"][0]["message"]["content"] def generate_audio(text, lang='ur'): """Convert text to audio using gTTS and return playable audio HTML.""" tts = gTTS(text, lang=lang) audio_path = os.path.join(temp_dir, "response.mp3") tts.save(audio_path) with open(audio_path, "rb") as audio_file: audio_bytes = audio_file.read() b64 = base64.b64encode(audio_bytes).decode() audio_html = f'' return audio_html # ----------------------------- MAIN LOGIC ---------------------------------- if question and uploaded_files and groq_api_key: with st.spinner("Processing your documents..."): documents = load_documents(uploaded_files) chunks = split_documents(documents) vector_db = create_vector_store(chunks) results = retrieve_docs(question, vector_db) context_text = "\n".join([doc.page_content for doc in results]) answer = query_llm_groq(context_text, question, groq_api_key) st.markdown("### 📘 Answer") parts = answer.split("2.") if len(parts) == 2: st.markdown(f"**Explanation:**\n{parts[0]}") st.markdown(f"**Storytelling:**\n{parts[1]}") st.markdown(generate_audio(parts[1]), unsafe_allow_html=True) else: st.markdown(answer) # ----------------------------- CLEANUP -------------------------------------- if os.path.exists(temp_dir): shutil.rmtree(temp_dir)