Spaces:

MiakOnline
/

learning_with_fun_app.py5

Sleeping

App Files Files Community

learning_with_fun_app.py5 / app.py

MiakOnline

Update app.py

d2728e2 verified 9 months ago

raw

history blame contribute delete

5.99 kB

	import streamlit as st
	from pypdf import PdfReader
	from docx import Document
	from PIL import Image
	from gtts import gTTS
	import tempfile
	import io

	from langchain_community.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.prompts import PromptTemplate
	from langchain.llms import HuggingFacePipeline

	from transformers import pipeline

	# ------------------------ Setup HuggingFace LLM -----------------------
	text_gen_pipeline = pipeline(
	"text-generation",
	model="distilgpt2",
	device=-1, # CPU
	max_new_tokens=150
	)
	llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

	# -------------------------- Streamlit UI Setup -------------------------
	st.set_page_config(page_title="Learning with Fun", layout="centered")

	st.markdown("""
	<style>
	body {
	background: linear-gradient(to right, #f9f9f9, #e0f7fa);
	}
	.stApp {
	font-family: 'Segoe UI', sans-serif;
	}
	.title {
	text-align: center;
	font-size: 36px;
	font-weight: bold;
	color: #006064;
	margin-bottom: 10px;
	}
	.subtext {
	text-align: center;
	font-size: 18px;
	color: #00796B;
	margin-bottom: 30px;
	}
	</style>
	""", unsafe_allow_html=True)

	st.markdown('<div class="title">📘 Learning with Fun</div>', unsafe_allow_html=True)
	st.markdown('<div class="subtext">Ask questions from your syllabus in a fun way!</div>', unsafe_allow_html=True)

	# -------------------------- Sidebar Controls ----------------------------
	grade = st.sidebar.selectbox("🎓 Select Grade", ["Grade 5", "Grade 6"])
	subject = st.sidebar.selectbox("📘 Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
	mode = st.sidebar.radio("🎯 Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
	voice_enabled = st.sidebar.checkbox("🔈 Enable Voice Output", value=True)

	# --------------------- File Upload and Text Extraction -------------------
	uploaded_file = st.file_uploader("📂 Upload Syllabus File (PDF, DOCX, JPEG, PNG)", type=["pdf", "docx", "jpeg", "jpg", "png"])

	def extract_text(file) -> str:
	text = ""
	if file.type == "application/pdf":
	try:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
	tmp.write(file.read())
	tmp.seek(0)
	reader = PdfReader(tmp.name)
	for page in reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text
	except Exception as e:
	st.error(f"Failed to read PDF: {e}")
	elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	doc = Document(io.BytesIO(file.read()))
	for para in doc.paragraphs:
	text += para.text + "\n"
	elif file.type in ["image/jpeg", "image/png"]:
	try:
	import pytesseract
	image = Image.open(file)
	text = pytesseract.image_to_string(image)
	except ImportError:
	st.error("Please install pytesseract for image to text conversion.")
	else:
	st.error("Unsupported file format.")
	return text.strip()

	# -------------------- Create Vector Store -------------------------------
	def create_vectorstore(text: str) -> FAISS:
	splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	docs = splitter.create_documents([text])
	embeddings = HuggingFaceEmbeddings()
	vectorstore = FAISS.from_documents(docs, embeddings)
	return vectorstore

	# ------------------------ Prompt Templates ------------------------------
	story_prompt = PromptTemplate.from_template(
	"ایک طالب علم نے سوال کیا: {question}\n"
	"نصاب کی معلومات: {context}\n"
	"برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔"
	)

	explain_prompt = PromptTemplate.from_template(
	"سوال: {question}\n"
	"نصاب کا سیاق و سباق: {context}\n"
	"براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
	)

	# -------------------------- TTS Generator -------------------------------
	def generate_voice(text: str, lang='ur') -> str:
	tts = gTTS(text=text, lang=lang)
	tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(tts_file.name)
	return tts_file.name

	# -------------------------- Answer Generator ----------------------------
	def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
	retriever = vectorstore.as_retriever()
	docs = retriever.get_relevant_documents(query)
	context = "\n".join([doc.page_content for doc in docs])

	if mode == "📖 Storytelling":
	prompt = story_prompt.format(question=query, context=context)
	else:
	prompt = explain_prompt.format(question=query, context=context)

	result = llm.invoke(prompt)
	return result.strip()

	# ----------------------------- Main Logic -------------------------------
	if uploaded_file:
	raw_text = extract_text(uploaded_file)
	if not raw_text:
	st.error("No text extracted from file.")
	else:
	st.success("✅ Syllabus loaded successfully!")
	query = st.text_input("💬 Ask a question (Urdu or English):")
	if query:
	with st.spinner("🤔 Thinking..."):
	vectorstore = create_vectorstore(raw_text)
	answer = get_answer(query, vectorstore, mode)
	st.markdown("### ✅ Answer:")
	st.write(answer)

	if voice_enabled:
	audio_path = generate_voice(answer)
	st.audio(audio_path, format="audio/mp3")
	else:
	st.info("Please upload your syllabus file to begin.")