MiakOnline's picture
Update app.py
8c8ad75 verified
# learning_with_fun_app.py
import os
import tempfile
import streamlit as st
import requests
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader, UnstructuredImageLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from gtts import gTTS
import base64
import shutil
# ----------------------------- UI SETUP --------------------------------------
st.set_page_config(page_title="Learning with Fun", layout="wide")
st.title("πŸ“š Learning with Fun - Educational Q&A for Kids")
# ----------------------------- USER INPUT -----------------------------------
grade = st.selectbox("Select your Grade", ["Grade 5", "Grade 6"])
subject = st.selectbox("Select Subject", ["Science", "Math", "English"])
uploaded_files = st.file_uploader("Upload textbook files (PDF, DOCX, JPEG)", type=["pdf", "docx", "jpg", "jpeg"], accept_multiple_files=True)
question = st.text_input("Ask your question in English or Urdu")
groq_api_key = st.text_input("πŸ” Enter your GROQ API Key", type="password")
# ------------------------- SETUP TEMP FOLDER -------------------------------
temp_dir = tempfile.mkdtemp()
# ------------------------- UTILITY FUNCTIONS -------------------------------
def load_documents(uploaded_files):
"""Load various file types into LangChain Document format."""
docs = []
for file in uploaded_files:
ext = file.name.split(".")[-1].lower()
path = os.path.join(temp_dir, file.name)
with open(path, "wb") as f:
f.write(file.read())
if ext == "pdf":
loader = PyMuPDFLoader(path)
elif ext == "docx":
loader = Docx2txtLoader(path)
elif ext in ["jpg", "jpeg"]:
loader = UnstructuredImageLoader(path)
else:
continue
docs.extend(loader.load())
return docs
def split_documents(documents):
"""Split documents into smaller chunks."""
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
return splitter.split_documents(documents)
def create_vector_store(chunks):
"""Create FAISS vector DB from text chunks."""
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
return FAISS.from_documents(chunks, embeddings)
def retrieve_docs(query, vector_store):
"""Search FAISS for relevant chunks."""
return vector_store.similarity_search(query, k=3)
def query_llm_groq(context, query, groq_api_key):
"""Query GROQ LLaMA 3 API directly and return formatted answers."""
url = "https://api.groq.com/openai/v1/chat/completions"
headers = {
"Authorization": f"Bearer {groq_api_key}",
"Content-Type": "application/json"
}
prompt = f"""
Context:
{context}
Question:
{query}
Provide two outputs:
1. A simple, educational explanation in English + Urdu.
2. A creative storytelling version mixing English and Urdu.
"""
data = {
"model": "llama3-8b-8192",
"messages": [
{"role": "user", "content": prompt}
],
"temperature": 0.7
}
response = requests.post(url, headers=headers, json=data)
response.raise_for_status()
result = response.json()
return result["choices"][0]["message"]["content"]
def generate_audio(text, lang='ur'):
"""Convert text to audio using gTTS and return playable audio HTML."""
tts = gTTS(text, lang=lang)
audio_path = os.path.join(temp_dir, "response.mp3")
tts.save(audio_path)
with open(audio_path, "rb") as audio_file:
audio_bytes = audio_file.read()
b64 = base64.b64encode(audio_bytes).decode()
audio_html = f'<audio autoplay controls><source src="data:audio/mp3;base64,{b64}" type="audio/mp3"></audio>'
return audio_html
# ----------------------------- MAIN LOGIC ----------------------------------
if question and uploaded_files and groq_api_key:
with st.spinner("Processing your documents..."):
documents = load_documents(uploaded_files)
chunks = split_documents(documents)
vector_db = create_vector_store(chunks)
results = retrieve_docs(question, vector_db)
context_text = "\n".join([doc.page_content for doc in results])
answer = query_llm_groq(context_text, question, groq_api_key)
st.markdown("### πŸ“˜ Answer")
parts = answer.split("2.")
if len(parts) == 2:
st.markdown(f"**Explanation:**\n{parts[0]}")
st.markdown(f"**Storytelling:**\n{parts[1]}")
st.markdown(generate_audio(parts[1]), unsafe_allow_html=True)
else:
st.markdown(answer)
# ----------------------------- CLEANUP --------------------------------------
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)