Spaces:

MiakOnline
/

learning_with_fun_app.py2

Sleeping

App Files Files Community

learning_with_fun_app.py2 / app.py

MiakOnline

Update app.py

35f363f verified 9 months ago

raw

history blame contribute delete

6.1 kB

	# learning_with_fun_app.py

	import os
	import tempfile
	import streamlit as st
	import requests
	from langchain_community.vectorstores import FAISS
	from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader, UnstructuredImageLoader
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_core.documents import Document
	from gtts import gTTS
	import base64
	import shutil

	# ----------------------------- UI SETUP --------------------------------------
	st.set_page_config(page_title="Learning with Fun", layout="wide")
	st.markdown("""
	<style>
	.main {
	background-color: #f0f8ff;
	}
	.block-container {
	padding-top: 2rem;
	}
	.stSelectbox > label, .stTextInput > label {
	font-size: 18px;
	font-weight: bold;
	color: #2e7d32;
	}
	.stTextInput input {
	font-size: 16px;
	padding: 10px;
	border-radius: 10px;
	}
	.title-container {
	display: flex;
	align-items: center;
	gap: 20px;
	}
	.title-container img {
	height: 80px;
	}
	</style>
	""", unsafe_allow_html=True)

	st.markdown("""
	<div class="title-container">
	<img src="https://cdn-icons-png.flaticon.com/512/201/201623.png" alt="Kids Book">
	<div>
	<h1>🌈 Learning with Fun 🎓</h1>
	<h4>Helping Kids Learn Through Interactive Books, Questions & Stories!</h4>
	</div>
	</div>
	""", unsafe_allow_html=True)

	# ----------------------------- USER INPUT -----------------------------------
	grade = st.selectbox("Select your Grade", ["Grade 5", "Grade 6"])
	subject = st.selectbox("Select Subject", ["Science", "Math", "English"])

	uploaded_files = st.file_uploader("Upload textbook files (PDF, DOCX, JPEG)", type=["pdf", "docx", "jpg", "jpeg"], accept_multiple_files=True)
	question = st.text_input("Ask your question in English or Urdu", value="" if 'last_question' not in st.session_state else st.session_state.last_question)

	submit_btn = st.button("💬 Submit Question")
	clear_btn = st.button("🧹 Clear")

	# ----------------------------- ENV VAR SETUP -----------------------------------
	groq_api_key = os.getenv("GROQ_API_KEY", "")
	if not groq_api_key:
	st.warning("GROQ API key is not set in the environment. Please configure it as a Hugging Face Secret with the name 'GROQ_API_KEY'.")

	# ------------------------- SETUP TEMP FOLDER -------------------------------
	temp_dir = tempfile.mkdtemp()

	# ------------------------- UTILITY FUNCTIONS -------------------------------
	def load_documents(uploaded_files):
	docs = []
	for file in uploaded_files:
	ext = file.name.split(".")[-1].lower()
	path = os.path.join(temp_dir, file.name)
	with open(path, "wb") as f:
	f.write(file.read())

	if ext == "pdf":
	loader = PyMuPDFLoader(path)
	elif ext == "docx":
	loader = Docx2txtLoader(path)
	elif ext in ["jpg", "jpeg"]:
	loader = UnstructuredImageLoader(path)
	else:
	continue
	docs.extend(loader.load())
	return docs

	def split_documents(documents):
	splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	return splitter.split_documents(documents)

	def create_vector_store(chunks):
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	return FAISS.from_documents(chunks, embeddings)

	def retrieve_docs(query, vector_store):
	return vector_store.similarity_search(query, k=3)

	def query_llm_groq(context, query, groq_api_key):
	url = "https://api.groq.com/openai/v1/chat/completions"
	headers = {
	"Authorization": f"Bearer {groq_api_key}",
	"Content-Type": "application/json"
	}
	prompt = f"""
	Context:
	{context}

	Question:
	{query}

	Provide two outputs:
	1. A simple, educational explanation in English + Urdu.
	2. A creative storytelling version mixing English and Urdu.
	"""
	data = {
	"model": "llama3-8b-8192",
	"messages": [
	{"role": "user", "content": prompt}
	],
	"temperature": 0.7
	}
	response = requests.post(url, headers=headers, json=data)
	response.raise_for_status()
	result = response.json()
	return result["choices"][0]["message"]["content"]

	def generate_audio(text, lang='ur'):
	tts = gTTS(text, lang=lang)
	audio_path = os.path.join(temp_dir, "response.mp3")
	tts.save(audio_path)
	with open(audio_path, "rb") as audio_file:
	audio_bytes = audio_file.read()
	b64 = base64.b64encode(audio_bytes).decode()
	audio_html = f'<audio controls><source src="data:audio/mp3;base64,{b64}" type="audio/mp3"></audio>'
	return audio_html

	# ----------------------------- MAIN LOGIC ----------------------------------
	if submit_btn and question and uploaded_files and groq_api_key:
	with st.spinner("Processing your documents and generating answer..."):
	documents = load_documents(uploaded_files)
	chunks = split_documents(documents)
	vector_db = create_vector_store(chunks)

	results = retrieve_docs(question, vector_db)
	context_text = "\n".join([doc.page_content for doc in results])
	answer = query_llm_groq(context_text, question, groq_api_key)

	st.session_state.answer = answer
	st.session_state.last_question = question

	if 'answer' in st.session_state:
	st.markdown("### 📘 Answer")
	parts = st.session_state.answer.split("2.")
	if len(parts) == 2:
	st.markdown(f"Explanation:\n{parts[0]}")
	st.markdown(f"Storytelling:\n{parts[1]}")

	if st.button("🔊 Play Storytelling Voice"):
	st.markdown(generate_audio(parts[1]), unsafe_allow_html=True)
	else:
	st.markdown(st.session_state.answer)

	if clear_btn:
	if 'answer' in st.session_state:
	del st.session_state['answer']

	# ----------------------------- CLEANUP --------------------------------------
	if os.path.exists(temp_dir):
	shutil.rmtree(temp_dir)