Spaces:
Runtime error
Runtime error
| # learning_with_fun_app.py | |
| import os | |
| import tempfile | |
| import streamlit as st | |
| import requests | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader, UnstructuredImageLoader | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_core.documents import Document | |
| from gtts import gTTS | |
| import base64 | |
| import shutil | |
| # ----------------------------- UI SETUP -------------------------------------- | |
| st.set_page_config(page_title="Learning with Fun", layout="wide") | |
| st.title("π Learning with Fun - Educational Q&A for Kids") | |
| # ----------------------------- USER INPUT ----------------------------------- | |
| grade = st.selectbox("Select your Grade", ["Grade 5", "Grade 6"]) | |
| subject = st.selectbox("Select Subject", ["Science", "Math", "English"]) | |
| uploaded_files = st.file_uploader("Upload textbook files (PDF, DOCX, JPEG)", type=["pdf", "docx", "jpg", "jpeg"], accept_multiple_files=True) | |
| question = st.text_input("Ask your question in English or Urdu") | |
| groq_api_key = st.text_input("π Enter your GROQ API Key", type="password") | |
| # ------------------------- SETUP TEMP FOLDER ------------------------------- | |
| temp_dir = tempfile.mkdtemp() | |
| # ------------------------- UTILITY FUNCTIONS ------------------------------- | |
| def load_documents(uploaded_files): | |
| """Load various file types into LangChain Document format.""" | |
| docs = [] | |
| for file in uploaded_files: | |
| ext = file.name.split(".")[-1].lower() | |
| path = os.path.join(temp_dir, file.name) | |
| with open(path, "wb") as f: | |
| f.write(file.read()) | |
| if ext == "pdf": | |
| loader = PyMuPDFLoader(path) | |
| elif ext == "docx": | |
| loader = Docx2txtLoader(path) | |
| elif ext in ["jpg", "jpeg"]: | |
| loader = UnstructuredImageLoader(path) | |
| else: | |
| continue | |
| docs.extend(loader.load()) | |
| return docs | |
| def split_documents(documents): | |
| """Split documents into smaller chunks.""" | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| return splitter.split_documents(documents) | |
| def create_vector_store(chunks): | |
| """Create FAISS vector DB from text chunks.""" | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| return FAISS.from_documents(chunks, embeddings) | |
| def retrieve_docs(query, vector_store): | |
| """Search FAISS for relevant chunks.""" | |
| return vector_store.similarity_search(query, k=3) | |
| def query_llm_groq(context, query, groq_api_key): | |
| """Query GROQ LLaMA 3 API directly and return formatted answers.""" | |
| url = "https://api.groq.com/openai/v1/chat/completions" | |
| headers = { | |
| "Authorization": f"Bearer {groq_api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| prompt = f""" | |
| Context: | |
| {context} | |
| Question: | |
| {query} | |
| Provide two outputs: | |
| 1. A simple, educational explanation in English + Urdu. | |
| 2. A creative storytelling version mixing English and Urdu. | |
| """ | |
| data = { | |
| "model": "llama3-8b-8192", | |
| "messages": [ | |
| {"role": "user", "content": prompt} | |
| ], | |
| "temperature": 0.7 | |
| } | |
| response = requests.post(url, headers=headers, json=data) | |
| response.raise_for_status() | |
| result = response.json() | |
| return result["choices"][0]["message"]["content"] | |
| def generate_audio(text, lang='ur'): | |
| """Convert text to audio using gTTS and return playable audio HTML.""" | |
| tts = gTTS(text, lang=lang) | |
| audio_path = os.path.join(temp_dir, "response.mp3") | |
| tts.save(audio_path) | |
| with open(audio_path, "rb") as audio_file: | |
| audio_bytes = audio_file.read() | |
| b64 = base64.b64encode(audio_bytes).decode() | |
| audio_html = f'<audio autoplay controls><source src="data:audio/mp3;base64,{b64}" type="audio/mp3"></audio>' | |
| return audio_html | |
| # ----------------------------- MAIN LOGIC ---------------------------------- | |
| if question and uploaded_files and groq_api_key: | |
| with st.spinner("Processing your documents..."): | |
| documents = load_documents(uploaded_files) | |
| chunks = split_documents(documents) | |
| vector_db = create_vector_store(chunks) | |
| results = retrieve_docs(question, vector_db) | |
| context_text = "\n".join([doc.page_content for doc in results]) | |
| answer = query_llm_groq(context_text, question, groq_api_key) | |
| st.markdown("### π Answer") | |
| parts = answer.split("2.") | |
| if len(parts) == 2: | |
| st.markdown(f"**Explanation:**\n{parts[0]}") | |
| st.markdown(f"**Storytelling:**\n{parts[1]}") | |
| st.markdown(generate_audio(parts[1]), unsafe_allow_html=True) | |
| else: | |
| st.markdown(answer) | |
| # ----------------------------- CLEANUP -------------------------------------- | |
| if os.path.exists(temp_dir): | |
| shutil.rmtree(temp_dir) | |