learning_with_fun_app.py

Sleeping

App Files Files Community

MiakOnline commited on May 18, 2025

Commit

d2728e2

verified ·

1 Parent(s): 3dc53a6

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -70

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import streamlit as st
 from pypdf import PdfReader
 from docx import Document
-import tempfile
-from gtts import gTTS
 from PIL import Image
-from langchain.vectorstores import FAISS
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.prompts import PromptTemplate
@@ -14,71 +14,84 @@ from langchain.llms import HuggingFacePipeline
 from transformers import pipeline
-# Setup HuggingFace pipeline with distilgpt2 (CPU)
-# text_gen_pipeline = pipeline(
-#    "text-generation",
-#    model="distilgpt2",
-#    device=-1  # CPU only
-#)
 text_gen_pipeline = pipeline(
     "text-generation",
     model="distilgpt2",
-    device=-1,  # CPU only
-    max_new_tokens=150,  # Increased from default
-    do_sample=True,
-    temperature=0.7,
-    top_k=50,
-    top_p=0.95
 )
 llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
-# Streamlit app config
-st.set_page_config(page_title="Learning with Fun", layout="wide")
-st.title("📘 Learning with Fun - Kids QA App")
-st.markdown("Ask questions from your syllabus! 📚")
-# Sidebar widgets
-grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
-subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
-mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
-voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
-# File uploader for syllabus
-uploaded_file = st.file_uploader(
-    "Upload your syllabus file (PDF, DOCX, JPEG, PNG, JPG)",
-    type=["pdf", "docx", "jpeg", "png", "jpg"]
-)
-# Extract text content from uploaded file directly
-def extract_text_from_uploaded(file) -> str:
     text = ""
-    if file is None:
-        return text
     if file.type == "application/pdf":
         try:
-            reader = PdfReader(file)
-            for page in reader.pages:
-                page_text = page.extract_text()
-                if page_text:
-                    text += page_text
         except Exception as e:
-            st.error(f"Error reading PDF file: {e}")
     elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
-        try:
-            doc = Document(file)
-            for para in doc.paragraphs:
-                text += para.text + "\n"
-        except Exception as e:
-            st.error(f"Error reading DOCX file: {e}")
     elif file.type in ["image/jpeg", "image/png"]:
-        st.warning("Image files currently are not supported for text extraction.")
     else:
         st.error("Unsupported file format.")
-    return text
-# Create vector store for similarity search
 def create_vectorstore(text: str) -> FAISS:
     splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     docs = splitter.create_documents([text])
@@ -86,7 +99,7 @@ def create_vectorstore(text: str) -> FAISS:
     vectorstore = FAISS.from_documents(docs, embeddings)
     return vectorstore
-# Prompt templates
 story_prompt = PromptTemplate.from_template(
     "ایک طالب علم نے سوال کیا: {question}\n"
     "نصاب کی معلومات: {context}\n"
@@ -99,14 +112,14 @@ explain_prompt = PromptTemplate.from_template(
     "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
 )
-# Generate speech audio from text
 def generate_voice(text: str, lang='ur') -> str:
     tts = gTTS(text=text, lang=lang)
     tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     tts.save(tts_file.name)
     return tts_file.name
-# Generate answer using vectorstore context and LLM
 def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
     retriever = vectorstore.as_retriever()
     docs = retriever.get_relevant_documents(query)
@@ -117,27 +130,26 @@ def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
     else:
         prompt = explain_prompt.format(question=query, context=context)
-    answer = llm.invoke(prompt)
-    return answer
-# Main app flow
 if uploaded_file:
-    raw_text = extract_text_from_uploaded(uploaded_file)
-    if not raw_text.strip():
-        st.error("No text extracted from the file. Please check the file content.")
     else:
-        st.success("📄 Syllabus loaded successfully!")
-        query = st.text_input("❓ Ask your question (Urdu or English)")
         if query:
-            with st.spinner("Thinking..."):
                 vectorstore = create_vectorstore(raw_text)
                 answer = get_answer(query, vectorstore, mode)
                 st.markdown("### ✅ Answer:")
                 st.write(answer)
                 if voice_enabled:
-                    audio_file = generate_voice(answer)
-                    with open(audio_file, "rb") as audio:
-                        st.audio(audio.read(), format="audio/mp3")
 else:
-    st.info("Please upload a syllabus file above.")

 import streamlit as st
 from pypdf import PdfReader
 from docx import Document
 from PIL import Image
+from gtts import gTTS
+import tempfile
+import io
+from langchain_community.vectorstores import FAISS
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.prompts import PromptTemplate
 from transformers import pipeline
+# ------------------------ Setup HuggingFace LLM -----------------------
 text_gen_pipeline = pipeline(
     "text-generation",
     model="distilgpt2",
+    device=-1,  # CPU
+    max_new_tokens=150
 )
 llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
+# -------------------------- Streamlit UI Setup -------------------------
+st.set_page_config(page_title="Learning with Fun", layout="centered")
+st.markdown("""
+    <style>
+        body {
+            background: linear-gradient(to right, #f9f9f9, #e0f7fa);
+        }
+        .stApp {
+            font-family: 'Segoe UI', sans-serif;
+        }
+        .title {
+            text-align: center;
+            font-size: 36px;
+            font-weight: bold;
+            color: #006064;
+            margin-bottom: 10px;
+        }
+        .subtext {
+            text-align: center;
+            font-size: 18px;
+            color: #00796B;
+            margin-bottom: 30px;
+        }
+    </style>
+""", unsafe_allow_html=True)
+st.markdown('<div class="title">📘 Learning with Fun</div>', unsafe_allow_html=True)
+st.markdown('<div class="subtext">Ask questions from your syllabus in a fun way!</div>', unsafe_allow_html=True)
+# -------------------------- Sidebar Controls ----------------------------
+grade = st.sidebar.selectbox("🎓 Select Grade", ["Grade 5", "Grade 6"])
+subject = st.sidebar.selectbox("📘 Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
+mode = st.sidebar.radio("🎯 Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
+voice_enabled = st.sidebar.checkbox("🔈 Enable Voice Output", value=True)
+# --------------------- File Upload and Text Extraction -------------------
+uploaded_file = st.file_uploader("📂 Upload Syllabus File (PDF, DOCX, JPEG, PNG)", type=["pdf", "docx", "jpeg", "jpg", "png"])
+def extract_text(file) -> str:
     text = ""
     if file.type == "application/pdf":
         try:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
+                tmp.write(file.read())
+                tmp.seek(0)
+                reader = PdfReader(tmp.name)
+                for page in reader.pages:
+                    page_text = page.extract_text()
+                    if page_text:
+                        text += page_text
         except Exception as e:
+            st.error(f"Failed to read PDF: {e}")
     elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+        doc = Document(io.BytesIO(file.read()))
+        for para in doc.paragraphs:
+            text += para.text + "\n"
     elif file.type in ["image/jpeg", "image/png"]:
+        try:
+            import pytesseract
+            image = Image.open(file)
+            text = pytesseract.image_to_string(image)
+        except ImportError:
+            st.error("Please install pytesseract for image to text conversion.")
     else:
         st.error("Unsupported file format.")
+    return text.strip()
+# -------------------- Create Vector Store -------------------------------
 def create_vectorstore(text: str) -> FAISS:
     splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     docs = splitter.create_documents([text])
     vectorstore = FAISS.from_documents(docs, embeddings)
     return vectorstore
+# ------------------------ Prompt Templates ------------------------------
 story_prompt = PromptTemplate.from_template(
     "ایک طالب علم نے سوال کیا: {question}\n"
     "نصاب کی معلومات: {context}\n"
     "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
 )
+# -------------------------- TTS Generator -------------------------------
 def generate_voice(text: str, lang='ur') -> str:
     tts = gTTS(text=text, lang=lang)
     tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     tts.save(tts_file.name)
     return tts_file.name
+# -------------------------- Answer Generator ----------------------------
 def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
     retriever = vectorstore.as_retriever()
     docs = retriever.get_relevant_documents(query)
     else:
         prompt = explain_prompt.format(question=query, context=context)
+    result = llm.invoke(prompt)
+    return result.strip()
+# ----------------------------- Main Logic -------------------------------
 if uploaded_file:
+    raw_text = extract_text(uploaded_file)
+    if not raw_text:
+        st.error("No text extracted from file.")
     else:
+        st.success("✅ Syllabus loaded successfully!")
+        query = st.text_input("💬 Ask a question (Urdu or English):")
         if query:
+            with st.spinner("🤔 Thinking..."):
                 vectorstore = create_vectorstore(raw_text)
                 answer = get_answer(query, vectorstore, mode)
                 st.markdown("### ✅ Answer:")
                 st.write(answer)
                 if voice_enabled:
+                    audio_path = generate_voice(answer)
+                    st.audio(audio_path, format="audio/mp3")
 else:
+    st.info("Please upload your syllabus file to begin.")