Spaces:

MiakOnline
/

learning_with_fun_app.py4

Sleeping

App Files Files Community

learning_with_fun_app.py4 / app.py

MiakOnline

Update app.py

190d269 verified 9 months ago

raw

history blame contribute delete

5.19 kB

	import streamlit as st
	from pypdf import PdfReader
	from docx import Document
	import tempfile
	import requests
	from gtts import gTTS

	from PIL import Image

	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.prompts import PromptTemplate
	from langchain.llms import HuggingFacePipeline

	from transformers import pipeline


	# Setup HuggingFace pipeline with distilgpt2 (CPU)
	text_gen_pipeline = pipeline(
	"text-generation",
	model="distilgpt2",
	device=-1 # CPU only
	)
	llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

	# Streamlit app config
	st.set_page_config(page_title="Learning with Fun", layout="wide")
	st.title("📘 Learning with Fun - Kids QA App")
	st.markdown("Ask questions from your syllabus! 📚")

	# Sidebar widgets
	grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
	subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
	mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
	voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)

	# Fetch syllabus file from Google Drive link
	def fetch_from_gdrive(link: str) -> str \| None:
	file_id = None
	if "id=" in link:
	file_id = link.split("id=")[1].split("&")[0]
	elif "/d/" in link:
	file_id = link.split("/d/")[1].split("/")[0]
	if not file_id:
	return None

	url = f"https://drive.google.com/uc?export=download&id={file_id}"
	response = requests.get(url)
	if response.status_code == 200:
	tmp_file = tempfile.NamedTemporaryFile(delete=False)
	tmp_file.write(response.content)
	tmp_file.close()
	return tmp_file.name
	return None

	uploaded_file = None
	file_link = st.text_input("Paste Google Drive Link to Syllabus File (.pdf or .docx)")

	if file_link:
	filepath = fetch_from_gdrive(file_link)
	if filepath:
	uploaded_file = filepath
	else:
	st.error("Invalid Google Drive link or download error.")

	# Extract text content from uploaded file
	def extract_text(file_path: str) -> str:
	text = ""
	if file_path.endswith(".pdf"):
	reader = PdfReader(file_path)
	for page in reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text
	elif file_path.endswith(".docx"):
	doc = Document(file_path)
	for para in doc.paragraphs:
	text += para.text + "\n"
	else:
	st.error("Unsupported file format. Please upload a PDF or DOCX file.")
	return text

	# Create vector store for similarity search
	def create_vectorstore(text: str) -> FAISS:
	splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	docs = splitter.create_documents([text])
	embeddings = HuggingFaceEmbeddings()
	vectorstore = FAISS.from_documents(docs, embeddings)
	return vectorstore

	# Prompt templates
	story_prompt = PromptTemplate.from_template(
	"ایک طالب علم نے سوال کیا: {question}\n"
	"نصاب کی معلومات: {context}\n"
	"برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔"
	)

	explain_prompt = PromptTemplate.from_template(
	"سوال: {question}\n"
	"نصاب کا سیاق و سباق: {context}\n"
	"براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
	)

	# Generate speech audio from text
	def generate_voice(text: str, lang='ur') -> str:
	tts = gTTS(text=text, lang=lang)
	tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(tts_file.name)
	return tts_file.name

	# Generate answer using vectorstore context and LLM
	def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
	retriever = vectorstore.as_retriever()
	docs = retriever.get_relevant_documents(query)
	context = "\n".join([doc.page_content for doc in docs])

	if mode == "📖 Storytelling":
	prompt = story_prompt.format(question=query, context=context)
	else:
	prompt = explain_prompt.format(question=query, context=context)

	answer = llm.invoke(prompt)
	return answer

	# Main app flow
	if uploaded_file:
	raw_text = extract_text(uploaded_file)
	if not raw_text.strip():
	st.error("No text extracted from the file. Please check the file content.")
	else:
	st.success("📄 Syllabus loaded successfully!")
	query = st.text_input("❓ Ask your question (Urdu or English)")
	if query:
	with st.spinner("Thinking..."):
	vectorstore = create_vectorstore(raw_text)
	answer = get_answer(query, vectorstore, mode)
	st.markdown("### ✅ Answer:")
	st.write(answer)

	if voice_enabled:
	audio_file = generate_voice(answer)
	with open(audio_file, "rb") as audio:
	st.audio(audio.read(), format="audio/mp3")
	else:
	st.info("Please paste a Google Drive link to your syllabus file (.pdf or .docx) above.")