MiakOnline commited on
Commit
cf45297
·
verified ·
1 Parent(s): 5590c40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -33
app.py CHANGED
@@ -2,9 +2,8 @@ import streamlit as st
2
  from pypdf import PdfReader
3
  from docx import Document
4
  from PIL import Image
5
- import pytesseract
6
- from gtts import gTTS
7
  import tempfile
 
8
 
9
  from langchain.vectorstores import FAISS
10
  from langchain.embeddings import HuggingFaceEmbeddings
@@ -13,106 +12,106 @@ from langchain.prompts import PromptTemplate
13
  from langchain.llms import HuggingFacePipeline
14
  from transformers import pipeline
15
 
16
- # Setup HuggingFace pipeline
17
  text_gen_pipeline = pipeline(
18
  "text-generation",
19
  model="distilgpt2",
20
- device=-1 # CPU only
21
  )
22
  llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
23
 
24
- # Streamlit page config
25
  st.set_page_config(page_title="Learning with Fun", layout="wide")
26
- st.title("📘 Learning with Fun - Kids QA App")
27
  st.markdown("Ask questions from your syllabus! 📚")
28
 
29
- # Sidebar options
30
  grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
31
  subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
32
  mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
33
  voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
34
 
35
- # File upload widget
36
- uploaded_file = st.file_uploader("📁 Upload Syllabus File (.pdf, .docx, .jpg, .jpeg, .png)", type=["pdf", "docx", "jpg", "jpeg", "png"])
37
 
38
- # Extract text from file
39
- def extract_text_from_file(file) -> str:
40
  text = ""
41
  if file.name.endswith(".pdf"):
42
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
43
- tmp_file.write(file.read())
44
- reader = PdfReader(tmp_file.name)
45
  for page in reader.pages:
46
  page_text = page.extract_text()
47
  if page_text:
48
  text += page_text
49
  elif file.name.endswith(".docx"):
50
- with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file:
51
- tmp_file.write(file.read())
52
- doc = Document(tmp_file.name)
53
  for para in doc.paragraphs:
54
  text += para.text + "\n"
55
- elif file.name.lower().endswith((".jpg", ".jpeg", ".png")):
56
- img = Image.open(file)
57
- text = pytesseract.image_to_string(img, lang='eng+urd')
 
58
  else:
59
  st.error("Unsupported file format.")
60
  return text
61
 
62
- # Create vector DB
63
  def create_vectorstore(text: str) -> FAISS:
64
  splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
65
  docs = splitter.create_documents([text])
66
  embeddings = HuggingFaceEmbeddings()
67
  return FAISS.from_documents(docs, embeddings)
68
 
69
- # Prompt templates
70
  story_prompt = PromptTemplate.from_template(
71
  "ایک طالب علم نے سوال کیا: {question}\n"
72
  "نصاب کی معلومات: {context}\n"
73
  "برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔"
74
  )
75
-
76
  explain_prompt = PromptTemplate.from_template(
77
  "سوال: {question}\n"
78
  "نصاب کا سیاق و سباق: {context}\n"
79
  "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
80
  )
81
 
82
- # Text-to-speech
83
  def generate_voice(text: str, lang='ur') -> str:
84
  tts = gTTS(text=text, lang=lang)
85
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
86
  tts.save(tts_file.name)
87
  return tts_file.name
88
 
89
- # Answer generation
90
  def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
91
  retriever = vectorstore.as_retriever()
92
  docs = retriever.get_relevant_documents(query)
93
  context = "\n".join([doc.page_content for doc in docs])
94
-
95
  prompt = story_prompt.format(question=query, context=context) if mode == "📖 Storytelling" else explain_prompt.format(question=query, context=context)
96
  return llm.invoke(prompt)
97
 
98
- # Main logic
99
  if uploaded_file:
100
- raw_text = extract_text_from_file(uploaded_file)
101
- if not raw_text.strip():
102
- st.error("⚠️ No text found in the uploaded file.")
103
- else:
104
  st.success("✅ Syllabus loaded successfully!")
105
  query = st.text_input("❓ Ask your question (Urdu or English)")
106
  if query:
107
  with st.spinner("Thinking..."):
108
  vectorstore = create_vectorstore(raw_text)
109
  answer = get_answer(query, vectorstore, mode)
110
- st.markdown("### 📘 Answer:")
111
  st.write(answer)
112
 
113
  if voice_enabled:
114
  audio_file = generate_voice(answer)
115
  with open(audio_file, "rb") as audio:
116
  st.audio(audio.read(), format="audio/mp3")
 
 
 
117
  else:
118
- st.info("📂 Please upload your syllabus file to begin.")
 
2
  from pypdf import PdfReader
3
  from docx import Document
4
  from PIL import Image
 
 
5
  import tempfile
6
+ from gtts import gTTS
7
 
8
  from langchain.vectorstores import FAISS
9
  from langchain.embeddings import HuggingFaceEmbeddings
 
12
  from langchain.llms import HuggingFacePipeline
13
  from transformers import pipeline
14
 
15
+ # Setup LLM pipeline
16
  text_gen_pipeline = pipeline(
17
  "text-generation",
18
  model="distilgpt2",
19
+ device=-1 # Use CPU
20
  )
21
  llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
22
 
23
+ # Streamlit App Config
24
  st.set_page_config(page_title="Learning with Fun", layout="wide")
25
+ st.title("🌈📘 Learning with Fun 🎓 Kids QA App")
26
  st.markdown("Ask questions from your syllabus! 📚")
27
 
28
+ # Sidebar Controls
29
  grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
30
  subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
31
  mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
32
  voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
33
 
34
+ # File Upload
35
+ uploaded_file = st.file_uploader("📂 Upload Syllabus File (PDF, DOCX, JPG, PNG)", type=["pdf", "docx", "jpg", "jpeg", "png"])
36
 
37
+ # Extract text from PDF/DOCX
38
+ def extract_text(file) -> str:
39
  text = ""
40
  if file.name.endswith(".pdf"):
41
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
42
+ tmp.write(file.read())
43
+ reader = PdfReader(tmp.name)
44
  for page in reader.pages:
45
  page_text = page.extract_text()
46
  if page_text:
47
  text += page_text
48
  elif file.name.endswith(".docx"):
49
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
50
+ tmp.write(file.read())
51
+ doc = Document(tmp.name)
52
  for para in doc.paragraphs:
53
  text += para.text + "\n"
54
+ elif file.name.endswith((".jpg", ".jpeg", ".png")):
55
+ st.warning("🖼️ Image uploaded. Text extraction from images is disabled. Showing image only.")
56
+ image = Image.open(file)
57
+ st.image(image, caption="Uploaded Image", use_column_width=True)
58
  else:
59
  st.error("Unsupported file format.")
60
  return text
61
 
62
+ # Vectorstore creation
63
  def create_vectorstore(text: str) -> FAISS:
64
  splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
65
  docs = splitter.create_documents([text])
66
  embeddings = HuggingFaceEmbeddings()
67
  return FAISS.from_documents(docs, embeddings)
68
 
69
+ # Urdu Prompts
70
  story_prompt = PromptTemplate.from_template(
71
  "ایک طالب علم نے سوال کیا: {question}\n"
72
  "نصاب کی معلومات: {context}\n"
73
  "برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔"
74
  )
 
75
  explain_prompt = PromptTemplate.from_template(
76
  "سوال: {question}\n"
77
  "نصاب کا سیاق و سباق: {context}\n"
78
  "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
79
  )
80
 
81
+ # Voice generator
82
  def generate_voice(text: str, lang='ur') -> str:
83
  tts = gTTS(text=text, lang=lang)
84
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
85
  tts.save(tts_file.name)
86
  return tts_file.name
87
 
88
+ # Answer generator
89
  def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
90
  retriever = vectorstore.as_retriever()
91
  docs = retriever.get_relevant_documents(query)
92
  context = "\n".join([doc.page_content for doc in docs])
 
93
  prompt = story_prompt.format(question=query, context=context) if mode == "📖 Storytelling" else explain_prompt.format(question=query, context=context)
94
  return llm.invoke(prompt)
95
 
96
+ # Main App Logic
97
  if uploaded_file:
98
+ raw_text = extract_text(uploaded_file)
99
+ if raw_text.strip():
 
 
100
  st.success("✅ Syllabus loaded successfully!")
101
  query = st.text_input("❓ Ask your question (Urdu or English)")
102
  if query:
103
  with st.spinner("Thinking..."):
104
  vectorstore = create_vectorstore(raw_text)
105
  answer = get_answer(query, vectorstore, mode)
106
+ st.markdown("### Answer:")
107
  st.write(answer)
108
 
109
  if voice_enabled:
110
  audio_file = generate_voice(answer)
111
  with open(audio_file, "rb") as audio:
112
  st.audio(audio.read(), format="audio/mp3")
113
+ else:
114
+ if not uploaded_file.name.endswith((".jpg", ".jpeg", ".png")):
115
+ st.error("⚠️ Could not extract any text from the file.")
116
  else:
117
+ st.info("📁 Upload your syllabus file (PDF, DOCX, or image) to get started.")