File size: 5,991 Bytes
e57b753
78733f8
9b60ea9
260a142
d2728e2
 
 
f82cc7b
d2728e2
bf34897
b0dbdf5
190d269
4507d2c
f82cc7b
190d269
 
d2728e2
f82cc7b
 
 
d2728e2
 
f82cc7b
4507d2c
b0dbdf5
d2728e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b60ea9
260a142
f82cc7b
d2728e2
 
 
 
 
 
 
 
f82cc7b
d2728e2
260a142
d2728e2
 
 
260a142
d2728e2
 
 
 
 
 
190d269
5590c40
d2728e2
9b60ea9
d2728e2
f82cc7b
 
 
 
 
 
 
d2728e2
9b60ea9
 
 
 
 
260a142
9b60ea9
 
 
 
 
 
d2728e2
190d269
 
9b60ea9
 
 
 
d2728e2
190d269
9b60ea9
 
 
 
260a142
 
 
 
 
d2728e2
 
260a142
d2728e2
b0dbdf5
d2728e2
 
 
260a142
d2728e2
 
7ec7528
d2728e2
190d269
 
cf45297
9468d43
 
 
d2728e2
 
5ba5d9d
d2728e2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import streamlit as st
from pypdf import PdfReader
from docx import Document
from PIL import Image
from gtts import gTTS
import tempfile
import io

from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline

from transformers import pipeline

# ------------------------ Setup HuggingFace LLM -----------------------
text_gen_pipeline = pipeline(
    "text-generation",
    model="distilgpt2",
    device=-1,  # CPU
    max_new_tokens=150
)
llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

# -------------------------- Streamlit UI Setup -------------------------
st.set_page_config(page_title="Learning with Fun", layout="centered")

st.markdown("""
    <style>
        body {
            background: linear-gradient(to right, #f9f9f9, #e0f7fa);
        }
        .stApp {
            font-family: 'Segoe UI', sans-serif;
        }
        .title {
            text-align: center;
            font-size: 36px;
            font-weight: bold;
            color: #006064;
            margin-bottom: 10px;
        }
        .subtext {
            text-align: center;
            font-size: 18px;
            color: #00796B;
            margin-bottom: 30px;
        }
    </style>
""", unsafe_allow_html=True)

st.markdown('<div class="title">📘 Learning with Fun</div>', unsafe_allow_html=True)
st.markdown('<div class="subtext">Ask questions from your syllabus in a fun way!</div>', unsafe_allow_html=True)

# -------------------------- Sidebar Controls ----------------------------
grade = st.sidebar.selectbox("🎓 Select Grade", ["Grade 5", "Grade 6"])
subject = st.sidebar.selectbox("📘 Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
mode = st.sidebar.radio("🎯 Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
voice_enabled = st.sidebar.checkbox("🔈 Enable Voice Output", value=True)

# --------------------- File Upload and Text Extraction -------------------
uploaded_file = st.file_uploader("📂 Upload Syllabus File (PDF, DOCX, JPEG, PNG)", type=["pdf", "docx", "jpeg", "jpg", "png"])

def extract_text(file) -> str:
    text = ""
    if file.type == "application/pdf":
        try:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
                tmp.write(file.read())
                tmp.seek(0)
                reader = PdfReader(tmp.name)
                for page in reader.pages:
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text
        except Exception as e:
            st.error(f"Failed to read PDF: {e}")
    elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
        doc = Document(io.BytesIO(file.read()))
        for para in doc.paragraphs:
            text += para.text + "\n"
    elif file.type in ["image/jpeg", "image/png"]:
        try:
            import pytesseract
            image = Image.open(file)
            text = pytesseract.image_to_string(image)
        except ImportError:
            st.error("Please install pytesseract for image to text conversion.")
    else:
        st.error("Unsupported file format.")
    return text.strip()

# -------------------- Create Vector Store -------------------------------
def create_vectorstore(text: str) -> FAISS:
    splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = splitter.create_documents([text])
    embeddings = HuggingFaceEmbeddings()
    vectorstore = FAISS.from_documents(docs, embeddings)
    return vectorstore

# ------------------------ Prompt Templates ------------------------------
story_prompt = PromptTemplate.from_template(
    "ایک طالب علم نے سوال کیا: {question}\n"
    "نصاب کی معلومات: {context}\n"
    "برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔"
)

explain_prompt = PromptTemplate.from_template(
    "سوال: {question}\n"
    "نصاب کا سیاق و سباق: {context}\n"
    "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
)

# -------------------------- TTS Generator -------------------------------
def generate_voice(text: str, lang='ur') -> str:
    tts = gTTS(text=text, lang=lang)
    tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    tts.save(tts_file.name)
    return tts_file.name

# -------------------------- Answer Generator ----------------------------
def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
    retriever = vectorstore.as_retriever()
    docs = retriever.get_relevant_documents(query)
    context = "\n".join([doc.page_content for doc in docs])

    if mode == "📖 Storytelling":
        prompt = story_prompt.format(question=query, context=context)
    else:
        prompt = explain_prompt.format(question=query, context=context)

    result = llm.invoke(prompt)
    return result.strip()

# ----------------------------- Main Logic -------------------------------
if uploaded_file:
    raw_text = extract_text(uploaded_file)
    if not raw_text:
        st.error("No text extracted from file.")
    else:
        st.success("✅ Syllabus loaded successfully!")
        query = st.text_input("💬 Ask a question (Urdu or English):")
        if query:
            with st.spinner("🤔 Thinking..."):
                vectorstore = create_vectorstore(raw_text)
                answer = get_answer(query, vectorstore, mode)
                st.markdown("### ✅ Answer:")
                st.write(answer)

                if voice_enabled:
                    audio_path = generate_voice(answer)
                    st.audio(audio_path, format="audio/mp3")
else:
    st.info("Please upload your syllabus file to begin.")