Spaces:
Sleeping
Sleeping
File size: 5,991 Bytes
e57b753 78733f8 9b60ea9 260a142 d2728e2 f82cc7b d2728e2 bf34897 b0dbdf5 190d269 4507d2c f82cc7b 190d269 d2728e2 f82cc7b d2728e2 f82cc7b 4507d2c b0dbdf5 d2728e2 9b60ea9 260a142 f82cc7b d2728e2 f82cc7b d2728e2 260a142 d2728e2 260a142 d2728e2 190d269 5590c40 d2728e2 9b60ea9 d2728e2 f82cc7b d2728e2 9b60ea9 260a142 9b60ea9 d2728e2 190d269 9b60ea9 d2728e2 190d269 9b60ea9 260a142 d2728e2 260a142 d2728e2 b0dbdf5 d2728e2 260a142 d2728e2 7ec7528 d2728e2 190d269 cf45297 9468d43 d2728e2 5ba5d9d d2728e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import streamlit as st
from pypdf import PdfReader
from docx import Document
from PIL import Image
from gtts import gTTS
import tempfile
import io
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
# ------------------------ Setup HuggingFace LLM -----------------------
text_gen_pipeline = pipeline(
"text-generation",
model="distilgpt2",
device=-1, # CPU
max_new_tokens=150
)
llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
# -------------------------- Streamlit UI Setup -------------------------
st.set_page_config(page_title="Learning with Fun", layout="centered")
st.markdown("""
<style>
body {
background: linear-gradient(to right, #f9f9f9, #e0f7fa);
}
.stApp {
font-family: 'Segoe UI', sans-serif;
}
.title {
text-align: center;
font-size: 36px;
font-weight: bold;
color: #006064;
margin-bottom: 10px;
}
.subtext {
text-align: center;
font-size: 18px;
color: #00796B;
margin-bottom: 30px;
}
</style>
""", unsafe_allow_html=True)
st.markdown('<div class="title">📘 Learning with Fun</div>', unsafe_allow_html=True)
st.markdown('<div class="subtext">Ask questions from your syllabus in a fun way!</div>', unsafe_allow_html=True)
# -------------------------- Sidebar Controls ----------------------------
grade = st.sidebar.selectbox("🎓 Select Grade", ["Grade 5", "Grade 6"])
subject = st.sidebar.selectbox("📘 Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
mode = st.sidebar.radio("🎯 Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
voice_enabled = st.sidebar.checkbox("🔈 Enable Voice Output", value=True)
# --------------------- File Upload and Text Extraction -------------------
uploaded_file = st.file_uploader("📂 Upload Syllabus File (PDF, DOCX, JPEG, PNG)", type=["pdf", "docx", "jpeg", "jpg", "png"])
def extract_text(file) -> str:
text = ""
if file.type == "application/pdf":
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(file.read())
tmp.seek(0)
reader = PdfReader(tmp.name)
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
except Exception as e:
st.error(f"Failed to read PDF: {e}")
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
doc = Document(io.BytesIO(file.read()))
for para in doc.paragraphs:
text += para.text + "\n"
elif file.type in ["image/jpeg", "image/png"]:
try:
import pytesseract
image = Image.open(file)
text = pytesseract.image_to_string(image)
except ImportError:
st.error("Please install pytesseract for image to text conversion.")
else:
st.error("Unsupported file format.")
return text.strip()
# -------------------- Create Vector Store -------------------------------
def create_vectorstore(text: str) -> FAISS:
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.create_documents([text])
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_documents(docs, embeddings)
return vectorstore
# ------------------------ Prompt Templates ------------------------------
story_prompt = PromptTemplate.from_template(
"ایک طالب علم نے سوال کیا: {question}\n"
"نصاب کی معلومات: {context}\n"
"برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔"
)
explain_prompt = PromptTemplate.from_template(
"سوال: {question}\n"
"نصاب کا سیاق و سباق: {context}\n"
"براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
)
# -------------------------- TTS Generator -------------------------------
def generate_voice(text: str, lang='ur') -> str:
tts = gTTS(text=text, lang=lang)
tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(tts_file.name)
return tts_file.name
# -------------------------- Answer Generator ----------------------------
def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
retriever = vectorstore.as_retriever()
docs = retriever.get_relevant_documents(query)
context = "\n".join([doc.page_content for doc in docs])
if mode == "📖 Storytelling":
prompt = story_prompt.format(question=query, context=context)
else:
prompt = explain_prompt.format(question=query, context=context)
result = llm.invoke(prompt)
return result.strip()
# ----------------------------- Main Logic -------------------------------
if uploaded_file:
raw_text = extract_text(uploaded_file)
if not raw_text:
st.error("No text extracted from file.")
else:
st.success("✅ Syllabus loaded successfully!")
query = st.text_input("💬 Ask a question (Urdu or English):")
if query:
with st.spinner("🤔 Thinking..."):
vectorstore = create_vectorstore(raw_text)
answer = get_answer(query, vectorstore, mode)
st.markdown("### ✅ Answer:")
st.write(answer)
if voice_enabled:
audio_path = generate_voice(answer)
st.audio(audio_path, format="audio/mp3")
else:
st.info("Please upload your syllabus file to begin.")
|