File size: 5,193 Bytes
e57b753 78733f8 9b60ea9 190d269 9b60ea9 190d269 9b60ea9 bf34897 b0dbdf5 190d269 4507d2c 35f363f 190d269 4507d2c 9468d43 4507d2c b0dbdf5 190d269 9b60ea9 b0dbdf5 190d269 9b60ea9 b0dbdf5 190d269 9b60ea9 190d269 9b60ea9 190d269 9b60ea9 9468d43 9b60ea9 b0dbdf5 9b60ea9 190d269 9b60ea9 0abac89 9b60ea9 190d269 9b60ea9 190d269 9468d43 9b60ea9 190d269 9b60ea9 9468d43 9b60ea9 3cc40cc 9b60ea9 190d269 9b60ea9 190d269 9b60ea9 9468d43 3cc40cc 9b60ea9 190d269 b0dbdf5 9b60ea9 9468d43 7ec7528 190d269 9468d43 190d269 5ba5d9d 9468d43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import streamlit as st
from pypdf import PdfReader
from docx import Document
import tempfile
import requests
from gtts import gTTS
from PIL import Image
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
# Setup HuggingFace pipeline with distilgpt2 (CPU)
text_gen_pipeline = pipeline(
"text-generation",
model="distilgpt2",
device=-1 # CPU only
)
llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
# Streamlit app config
st.set_page_config(page_title="Learning with Fun", layout="wide")
st.title("📘 Learning with Fun - Kids QA App")
st.markdown("Ask questions from your syllabus! 📚")
# Sidebar widgets
grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
# Fetch syllabus file from Google Drive link
def fetch_from_gdrive(link: str) -> str | None:
file_id = None
if "id=" in link:
file_id = link.split("id=")[1].split("&")[0]
elif "/d/" in link:
file_id = link.split("/d/")[1].split("/")[0]
if not file_id:
return None
url = f"https://drive.google.com/uc?export=download&id={file_id}"
response = requests.get(url)
if response.status_code == 200:
tmp_file = tempfile.NamedTemporaryFile(delete=False)
tmp_file.write(response.content)
tmp_file.close()
return tmp_file.name
return None
uploaded_file = None
file_link = st.text_input("Paste Google Drive Link to Syllabus File (.pdf or .docx)")
if file_link:
filepath = fetch_from_gdrive(file_link)
if filepath:
uploaded_file = filepath
else:
st.error("Invalid Google Drive link or download error.")
# Extract text content from uploaded file
def extract_text(file_path: str) -> str:
text = ""
if file_path.endswith(".pdf"):
reader = PdfReader(file_path)
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
elif file_path.endswith(".docx"):
doc = Document(file_path)
for para in doc.paragraphs:
text += para.text + "\n"
else:
st.error("Unsupported file format. Please upload a PDF or DOCX file.")
return text
# Create vector store for similarity search
def create_vectorstore(text: str) -> FAISS:
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.create_documents([text])
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_documents(docs, embeddings)
return vectorstore
# Prompt templates
story_prompt = PromptTemplate.from_template(
"ایک طالب علم نے سوال کیا: {question}\n"
"نصاب کی معلومات: {context}\n"
"برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔"
)
explain_prompt = PromptTemplate.from_template(
"سوال: {question}\n"
"نصاب کا سیاق و سباق: {context}\n"
"براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
)
# Generate speech audio from text
def generate_voice(text: str, lang='ur') -> str:
tts = gTTS(text=text, lang=lang)
tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(tts_file.name)
return tts_file.name
# Generate answer using vectorstore context and LLM
def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
retriever = vectorstore.as_retriever()
docs = retriever.get_relevant_documents(query)
context = "\n".join([doc.page_content for doc in docs])
if mode == "📖 Storytelling":
prompt = story_prompt.format(question=query, context=context)
else:
prompt = explain_prompt.format(question=query, context=context)
answer = llm.invoke(prompt)
return answer
# Main app flow
if uploaded_file:
raw_text = extract_text(uploaded_file)
if not raw_text.strip():
st.error("No text extracted from the file. Please check the file content.")
else:
st.success("📄 Syllabus loaded successfully!")
query = st.text_input("❓ Ask your question (Urdu or English)")
if query:
with st.spinner("Thinking..."):
vectorstore = create_vectorstore(raw_text)
answer = get_answer(query, vectorstore, mode)
st.markdown("### ✅ Answer:")
st.write(answer)
if voice_enabled:
audio_file = generate_voice(answer)
with open(audio_file, "rb") as audio:
st.audio(audio.read(), format="audio/mp3")
else:
st.info("Please paste a Google Drive link to your syllabus file (.pdf or .docx) above.")
|