|
|
import streamlit as st |
|
|
from pypdf import PdfReader |
|
|
from docx import Document |
|
|
import tempfile |
|
|
import requests |
|
|
from gtts import gTTS |
|
|
|
|
|
from PIL import Image |
|
|
|
|
|
from langchain.vectorstores import FAISS |
|
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
|
from langchain.text_splitter import CharacterTextSplitter |
|
|
from langchain.prompts import PromptTemplate |
|
|
from langchain.llms import HuggingFacePipeline |
|
|
|
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
|
|
|
text_gen_pipeline = pipeline( |
|
|
"text-generation", |
|
|
model="distilgpt2", |
|
|
device=-1 |
|
|
) |
|
|
llm = HuggingFacePipeline(pipeline=text_gen_pipeline) |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Learning with Fun", layout="wide") |
|
|
st.title("📘 Learning with Fun - Kids QA App") |
|
|
st.markdown("Ask questions from your syllabus! 📚") |
|
|
|
|
|
|
|
|
grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"]) |
|
|
subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"]) |
|
|
mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"]) |
|
|
voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True) |
|
|
|
|
|
|
|
|
def fetch_from_gdrive(link: str) -> str | None: |
|
|
file_id = None |
|
|
if "id=" in link: |
|
|
file_id = link.split("id=")[1].split("&")[0] |
|
|
elif "/d/" in link: |
|
|
file_id = link.split("/d/")[1].split("/")[0] |
|
|
if not file_id: |
|
|
return None |
|
|
|
|
|
url = f"https://drive.google.com/uc?export=download&id={file_id}" |
|
|
response = requests.get(url) |
|
|
if response.status_code == 200: |
|
|
tmp_file = tempfile.NamedTemporaryFile(delete=False) |
|
|
tmp_file.write(response.content) |
|
|
tmp_file.close() |
|
|
return tmp_file.name |
|
|
return None |
|
|
|
|
|
uploaded_file = None |
|
|
file_link = st.text_input("Paste Google Drive Link to Syllabus File (.pdf or .docx)") |
|
|
|
|
|
if file_link: |
|
|
filepath = fetch_from_gdrive(file_link) |
|
|
if filepath: |
|
|
uploaded_file = filepath |
|
|
else: |
|
|
st.error("Invalid Google Drive link or download error.") |
|
|
|
|
|
|
|
|
def extract_text(file_path: str) -> str: |
|
|
text = "" |
|
|
if file_path.endswith(".pdf"): |
|
|
reader = PdfReader(file_path) |
|
|
for page in reader.pages: |
|
|
page_text = page.extract_text() |
|
|
if page_text: |
|
|
text += page_text |
|
|
elif file_path.endswith(".docx"): |
|
|
doc = Document(file_path) |
|
|
for para in doc.paragraphs: |
|
|
text += para.text + "\n" |
|
|
else: |
|
|
st.error("Unsupported file format. Please upload a PDF or DOCX file.") |
|
|
return text |
|
|
|
|
|
|
|
|
def create_vectorstore(text: str) -> FAISS: |
|
|
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) |
|
|
docs = splitter.create_documents([text]) |
|
|
embeddings = HuggingFaceEmbeddings() |
|
|
vectorstore = FAISS.from_documents(docs, embeddings) |
|
|
return vectorstore |
|
|
|
|
|
|
|
|
story_prompt = PromptTemplate.from_template( |
|
|
"ایک طالب علم نے سوال کیا: {question}\n" |
|
|
"نصاب کی معلومات: {context}\n" |
|
|
"برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔" |
|
|
) |
|
|
|
|
|
explain_prompt = PromptTemplate.from_template( |
|
|
"سوال: {question}\n" |
|
|
"نصاب کا سیاق و سباق: {context}\n" |
|
|
"براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔" |
|
|
) |
|
|
|
|
|
|
|
|
def generate_voice(text: str, lang='ur') -> str: |
|
|
tts = gTTS(text=text, lang=lang) |
|
|
tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") |
|
|
tts.save(tts_file.name) |
|
|
return tts_file.name |
|
|
|
|
|
|
|
|
def get_answer(query: str, vectorstore: FAISS, mode: str) -> str: |
|
|
retriever = vectorstore.as_retriever() |
|
|
docs = retriever.get_relevant_documents(query) |
|
|
context = "\n".join([doc.page_content for doc in docs]) |
|
|
|
|
|
if mode == "📖 Storytelling": |
|
|
prompt = story_prompt.format(question=query, context=context) |
|
|
else: |
|
|
prompt = explain_prompt.format(question=query, context=context) |
|
|
|
|
|
answer = llm.invoke(prompt) |
|
|
return answer |
|
|
|
|
|
|
|
|
if uploaded_file: |
|
|
raw_text = extract_text(uploaded_file) |
|
|
if not raw_text.strip(): |
|
|
st.error("No text extracted from the file. Please check the file content.") |
|
|
else: |
|
|
st.success("📄 Syllabus loaded successfully!") |
|
|
query = st.text_input("❓ Ask your question (Urdu or English)") |
|
|
if query: |
|
|
with st.spinner("Thinking..."): |
|
|
vectorstore = create_vectorstore(raw_text) |
|
|
answer = get_answer(query, vectorstore, mode) |
|
|
st.markdown("### ✅ Answer:") |
|
|
st.write(answer) |
|
|
|
|
|
if voice_enabled: |
|
|
audio_file = generate_voice(answer) |
|
|
with open(audio_file, "rb") as audio: |
|
|
st.audio(audio.read(), format="audio/mp3") |
|
|
else: |
|
|
st.info("Please paste a Google Drive link to your syllabus file (.pdf or .docx) above.") |
|
|
|