MiakOnline's picture
Update app.py
c18d608 verified
import streamlit as st
# Set page config FIRST
st.set_page_config(page_title="Education with Fun", page_icon="🎓", layout="centered")
import fitz # PyMuPDF
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from gtts import gTTS
import tempfile
import requests
import os
# --- Custom UI Styling ---
page_bg_img = """
<style>
[data-testid="stHeader"] {
background-color: rgba(0, 128, 0, 0.0);
}
.st-emotion-cache-1v0mbdj {
background-color: rgba(240, 255, 240, 0.92);
border-radius: 1rem;
padding: 1rem;
box-shadow: 0 0 10px rgba(0, 100, 0, 0.3);
}
</style>
"""
st.markdown(page_bg_img, unsafe_allow_html=True)
# --- Load Model ---
@st.cache_resource
def load_model():
return SentenceTransformer('all-MiniLM-L6-v2')
# --- Download PDF from Google Drive ---
def get_pdf_from_drive(drive_url):
file_id = drive_url.split('/d/')[1].split('/')[0]
download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
response = requests.get(download_url)
if response.status_code == 200:
return response.content
else:
st.error("❌ Failed to fetch syllabus PDF.")
return None
# --- Extract Text from PDF ---
def extract_text_from_pdf(pdf_bytes):
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
text = ""
for page in doc:
text += page.get_text()
return text
# --- Split Text into Chunks ---
def split_text(text, max_length=300):
sentences = text.split('.')
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) < max_length:
current_chunk += sentence + "."
else:
chunks.append(current_chunk.strip())
current_chunk = sentence + "."
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
# --- Create FAISS Index ---
def create_faiss_index(chunks, model):
embeddings = model.encode(chunks)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))
return index, chunks
# --- Search Top Matches ---
def search_index(query, model, index, chunks, k=3):
query_vector = model.encode([query])
distances, indices = index.search(np.array(query_vector), k)
results = [chunks[i] for i in indices[0]]
return results
# --- Story Answer Generator ---
def generate_story_answer(passages, question):
intro = "📖 چلو بچوں، ایک زبردست کہانی سنتے ہیں!\n"
content = " ".join(passages)
closing = f"\n\n🧐 سوال تھا: {question}"
return f"{intro}\n{content}\n{closing}"
# --- Text to Speech ---
def text_to_speech(text):
tts = gTTS(text=text, lang='ur')
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
tts.save(fp.name)
return fp.name
# --- App UI ---
st.title("📚 Education with Fun")
st.subheader("Learn Science & More (Grades 5, 6, Engineering) Through Stories and Sound!")
# Grade options with Engineering
grade = st.selectbox("📘 Select Grade or Program", ["Grade 5", "Grade 6", "Engineering"])
# Subject options
subject = st.selectbox("📙 Select Subject", ["Primary Science", "Organizational Behavior"])
# PDF Links by (grade + subject)
syllabus_links = {
("Grade 5", "Primary Science"): "https://drive.google.com/file/d/1Sx77ZsdFrwqK4Y8UhZVEAOOCvRPbjxyk/view?usp=sharing",
("Grade 6", "Primary Science"): "https://drive.google.com/file/d/1HKQAxvNBJMHsSCADkkUI7x6F137ldDo_/view?usp=sharing",
("Engineering", "Organizational Behavior"): "https://drive.google.com/file/d/1HKQAxvNBJMHsSCADkkUI7x6F137ldDo_/view?usp=sharing", # Replace with actual OB PDF later
("Engineering", "Primary Science"): "https://drive.google.com/file/d/1HKQAxvNBJMHsSCADkkUI7x6F137ldDo_/view?usp=sharing", # Placeholder
("Grade 5", "Organizational Behavior"): "https://drive.google.com/file/d/1Sx77ZsdFrwqK4Y8UhZVEAOOCvRPbjxyk/view?usp=sharing", # Placeholder
("Grade 6", "Organizational Behavior"): "https://drive.google.com/file/d/1HKQAxvNBJMHsSCADkkUI7x6F137ldDo_/view?usp=sharing", # Placeholder
}
# Input question
question = st.text_input("🧠 Ask a Question (in English or Urdu):")
# Process
if question and (grade, subject) in syllabus_links:
with st.spinner("📥 Downloading syllabus and generating answer..."):
model = load_model()
pdf_bytes = get_pdf_from_drive(syllabus_links[(grade, subject)])
if pdf_bytes:
full_text = extract_text_from_pdf(pdf_bytes)
chunks = split_text(full_text)
index, chunk_list = create_faiss_index(chunks, model)
relevant_chunks = search_index(question, model, index, chunk_list)
final_answer = generate_story_answer(relevant_chunks, question)
audio_file = text_to_speech(final_answer)
st.subheader("📖 Storytelling Answer")
st.write(final_answer)
st.subheader("🔊 Listen to the Answer")
st.audio(audio_file, format='audio/mp3')
st.success("✅ Done! You can ask another question.")