Spaces:

MiakOnline
/

learning_with_fun_app.py4

Sleeping

File size: 5,193 Bytes

e57b753
78733f8
9b60ea9
 
190d269
9b60ea9
190d269
 
 
9b60ea9
bf34897
b0dbdf5
190d269
4507d2c
35f363f
190d269
 
 
 
4507d2c
 
9468d43
 
4507d2c
 
b0dbdf5
190d269
9b60ea9
 
 
b0dbdf5
190d269
9b60ea9
 
 
 
b0dbdf5
190d269
 
 
9b60ea9
190d269
9b60ea9
 
190d269
9b60ea9
9468d43
9b60ea9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0dbdf5
9b60ea9
 
190d269
 
9b60ea9
 
0abac89
9b60ea9
190d269
 
 
9b60ea9
 
 
 
190d269
 
9468d43
9b60ea9
190d269
 
 
 
9b60ea9
 
 
 
9468d43
9b60ea9
 
 
 
 
3cc40cc
9b60ea9
 
 
 
 
 
190d269
 
 
9b60ea9
 
 
 
190d269
 
9b60ea9
 
 
9468d43
 
 
 
 
 
3cc40cc
 
9b60ea9
190d269
b0dbdf5
9b60ea9
9468d43
 
7ec7528
 
 
 
 
190d269
 
9468d43
 
 
 
 
 
190d269
5ba5d9d
9468d43

import streamlit as st
from pypdf import PdfReader
from docx import Document
import tempfile
import requests
from gtts import gTTS

from PIL import Image

from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline

from transformers import pipeline


# Setup HuggingFace pipeline with distilgpt2 (CPU)
text_gen_pipeline = pipeline(
    "text-generation",
    model="distilgpt2",
    device=-1  # CPU only
)
llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

# Streamlit app config
st.set_page_config(page_title="Learning with Fun", layout="wide")
st.title("📘 Learning with Fun - Kids QA App")
st.markdown("Ask questions from your syllabus! 📚")

# Sidebar widgets
grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)

# Fetch syllabus file from Google Drive link
def fetch_from_gdrive(link: str) -> str | None:
    file_id = None
    if "id=" in link:
        file_id = link.split("id=")[1].split("&")[0]
    elif "/d/" in link:
        file_id = link.split("/d/")[1].split("/")[0]
    if not file_id:
        return None

    url = f"https://drive.google.com/uc?export=download&id={file_id}"
    response = requests.get(url)
    if response.status_code == 200:
        tmp_file = tempfile.NamedTemporaryFile(delete=False)
        tmp_file.write(response.content)
        tmp_file.close()
        return tmp_file.name
    return None

uploaded_file = None
file_link = st.text_input("Paste Google Drive Link to Syllabus File (.pdf or .docx)")

if file_link:
    filepath = fetch_from_gdrive(file_link)
    if filepath:
        uploaded_file = filepath
    else:
        st.error("Invalid Google Drive link or download error.")

# Extract text content from uploaded file
def extract_text(file_path: str) -> str:
    text = ""
    if file_path.endswith(".pdf"):
        reader = PdfReader(file_path)
        for page in reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text
    elif file_path.endswith(".docx"):
        doc = Document(file_path)
        for para in doc.paragraphs:
            text += para.text + "\n"
    else:
        st.error("Unsupported file format. Please upload a PDF or DOCX file.")
    return text

# Create vector store for similarity search
def create_vectorstore(text: str) -> FAISS:
    splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = splitter.create_documents([text])
    embeddings = HuggingFaceEmbeddings()
    vectorstore = FAISS.from_documents(docs, embeddings)
    return vectorstore

# Prompt templates
story_prompt = PromptTemplate.from_template(
    "ایک طالب علم نے سوال کیا: {question}\n"
    "نصاب کی معلومات: {context}\n"
    "برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔"
)

explain_prompt = PromptTemplate.from_template(
    "سوال: {question}\n"
    "نصاب کا سیاق و سباق: {context}\n"
    "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
)

# Generate speech audio from text
def generate_voice(text: str, lang='ur') -> str:
    tts = gTTS(text=text, lang=lang)
    tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    tts.save(tts_file.name)
    return tts_file.name

# Generate answer using vectorstore context and LLM
def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
    retriever = vectorstore.as_retriever()
    docs = retriever.get_relevant_documents(query)
    context = "\n".join([doc.page_content for doc in docs])

    if mode == "📖 Storytelling":
        prompt = story_prompt.format(question=query, context=context)
    else:
        prompt = explain_prompt.format(question=query, context=context)

    answer = llm.invoke(prompt)
    return answer

# Main app flow
if uploaded_file:
    raw_text = extract_text(uploaded_file)
    if not raw_text.strip():
        st.error("No text extracted from the file. Please check the file content.")
    else:
        st.success("📄 Syllabus loaded successfully!")
        query = st.text_input("❓ Ask your question (Urdu or English)")
        if query:
            with st.spinner("Thinking..."):
                vectorstore = create_vectorstore(raw_text)
                answer = get_answer(query, vectorstore, mode)
                st.markdown("### ✅ Answer:")
                st.write(answer)

                if voice_enabled:
                    audio_file = generate_voice(answer)
                    with open(audio_file, "rb") as audio:
                        st.audio(audio.read(), format="audio/mp3")
else:
    st.info("Please paste a Google Drive link to your syllabus file (.pdf or .docx) above.")