MiakOnline commited on
Commit
9468d43
·
verified ·
1 Parent(s): 7ec7528

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -33
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from pypdf import PdfReader
3
  from docx import Document
 
4
  import requests
5
  import os
6
  import tempfile
@@ -12,33 +13,34 @@ from langchain_core.prompts import PromptTemplate
12
  from transformers import pipeline
13
  from langchain.llms import HuggingFacePipeline
14
 
15
- # Setup HuggingFace text generation pipeline (replace model with your choice)
16
  text_gen_pipeline = pipeline(
17
  "text-generation",
18
- model="gpt2", # lightweight model for demo, swap for bigger model as needed
19
- max_length=150
20
  )
21
  llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
22
 
23
- # Streamlit UI setup
24
  st.set_page_config(page_title="Learning with Fun", layout="wide")
25
  st.title("📘 Learning with Fun - Kids QA App")
26
  st.markdown("Ask questions from your syllabus! 📚")
27
 
28
- # Sidebar options
29
  grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
30
  subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
31
  mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
32
  voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
33
 
 
34
  def fetch_from_gdrive(link):
35
- file_id = None
36
  if "id=" in link:
37
  file_id = link.split("id=")[1]
38
  elif "/d/" in link:
39
  file_id = link.split("/d/")[1].split("/")[0]
40
- if not file_id:
41
  return None
 
42
  url = f"https://drive.google.com/uc?export=download&id={file_id}"
43
  response = requests.get(url)
44
  if response.status_code == 200:
@@ -58,33 +60,28 @@ if file_link:
58
  else:
59
  st.error("Invalid Google Drive link or download error.")
60
 
 
61
  def extract_text(file_path):
62
  text = ""
63
  if file_path.endswith(".pdf"):
64
  reader = PdfReader(file_path)
65
  for page in reader.pages:
66
- page_text = page.extract_text()
67
- if page_text:
68
- text += page_text
69
  elif file_path.endswith(".docx"):
70
  doc = Document(file_path)
71
  for para in doc.paragraphs:
72
  text += para.text + "\n"
73
- return text.strip()
74
 
 
75
  def create_vectorstore(text):
76
- if not text:
77
- return None
78
- splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
79
- docs = splitter.create_documents([text])
80
- # Filter out empty docs if any
81
- docs = [doc for doc in docs if doc.page_content.strip()]
82
- if not docs:
83
- return None
84
  embeddings = HuggingFaceEmbeddings()
85
  vectorstore = FAISS.from_documents(docs, embeddings)
86
  return vectorstore
87
 
 
88
  story_prompt = PromptTemplate.from_template(
89
  "ایک طالب علم نے سوال کیا: {question}\n"
90
  "نصاب کی معلومات: {context}\n"
@@ -97,39 +94,47 @@ explain_prompt = PromptTemplate.from_template(
97
  "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
98
  )
99
 
 
100
  def generate_voice(text, lang='ur'):
101
  tts = gTTS(text, lang=lang)
102
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
103
  tts.save(tts_file.name)
104
  return tts_file.name
105
 
 
106
  def get_answer(query, vectorstore, mode):
107
  retriever = vectorstore.as_retriever()
108
  docs = retriever.get_relevant_documents(query)
109
  context = "\n".join([doc.page_content for doc in docs])
110
- prompt = story_prompt.format(question=query, context=context) if mode == "📖 Storytelling" else explain_prompt.format(question=query, context=context)
 
 
 
 
 
111
  answer = llm.invoke(prompt)
112
  return answer
113
 
 
114
  if uploaded_file:
115
  raw_text = extract_text(uploaded_file)
116
- if not raw_text:
117
- st.error("No text could be extracted from the syllabus file. Please check the file.")
118
  else:
119
  st.success("📄 Syllabus loaded successfully!")
 
120
  query = st.text_input("❓ Ask your question (Urdu or English)")
121
  if query:
122
  with st.spinner("Thinking..."):
123
  vs = create_vectorstore(raw_text)
124
- if vs is None:
125
- st.error(" Failed to create vector store. The syllabus content may be too short or invalid.")
126
- else:
127
- answer = get_answer(query, vs, mode)
128
- st.markdown("### ✅ Answer:")
129
- st.write(answer)
130
- if voice_enabled:
131
- audio_file = generate_voice(answer)
132
- with open(audio_file, "rb") as f:
133
- st.audio(f.read(), format="audio/mp3")
134
  else:
135
- st.info("Please paste a valid Google Drive link to load your syllabus file.")
 
1
  import streamlit as st
2
  from pypdf import PdfReader
3
  from docx import Document
4
+ from PIL import Image
5
  import requests
6
  import os
7
  import tempfile
 
13
  from transformers import pipeline
14
  from langchain.llms import HuggingFacePipeline
15
 
16
+ # Setup HuggingFace pipeline with distilgpt2 (small, CPU-friendly)
17
  text_gen_pipeline = pipeline(
18
  "text-generation",
19
+ model="distilgpt2",
20
+ device=-1 # CPU only
21
  )
22
  llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
23
 
24
+ # App UI
25
  st.set_page_config(page_title="Learning with Fun", layout="wide")
26
  st.title("📘 Learning with Fun - Kids QA App")
27
  st.markdown("Ask questions from your syllabus! 📚")
28
 
29
+ # Sidebar
30
  grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
31
  subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
32
  mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
33
  voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
34
 
35
+ # Google Drive PDF/DOC support
36
  def fetch_from_gdrive(link):
 
37
  if "id=" in link:
38
  file_id = link.split("id=")[1]
39
  elif "/d/" in link:
40
  file_id = link.split("/d/")[1].split("/")[0]
41
+ else:
42
  return None
43
+
44
  url = f"https://drive.google.com/uc?export=download&id={file_id}"
45
  response = requests.get(url)
46
  if response.status_code == 200:
 
60
  else:
61
  st.error("Invalid Google Drive link or download error.")
62
 
63
+ # Extract text from PDF or DOCX
64
  def extract_text(file_path):
65
  text = ""
66
  if file_path.endswith(".pdf"):
67
  reader = PdfReader(file_path)
68
  for page in reader.pages:
69
+ text += page.extract_text() or ""
 
 
70
  elif file_path.endswith(".docx"):
71
  doc = Document(file_path)
72
  for para in doc.paragraphs:
73
  text += para.text + "\n"
74
+ return text
75
 
76
+ # Create vectorstore from syllabus text
77
  def create_vectorstore(text):
78
+ text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
79
+ docs = text_splitter.create_documents([text])
 
 
 
 
 
 
80
  embeddings = HuggingFaceEmbeddings()
81
  vectorstore = FAISS.from_documents(docs, embeddings)
82
  return vectorstore
83
 
84
+ # Prompt templates
85
  story_prompt = PromptTemplate.from_template(
86
  "ایک طالب علم نے سوال کیا: {question}\n"
87
  "نصاب کی معلومات: {context}\n"
 
94
  "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
95
  )
96
 
97
+ # Generate voice from text
98
  def generate_voice(text, lang='ur'):
99
  tts = gTTS(text, lang=lang)
100
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
101
  tts.save(tts_file.name)
102
  return tts_file.name
103
 
104
+ # Get answer using LLM and vectorstore
105
  def get_answer(query, vectorstore, mode):
106
  retriever = vectorstore.as_retriever()
107
  docs = retriever.get_relevant_documents(query)
108
  context = "\n".join([doc.page_content for doc in docs])
109
+
110
+ if mode == "📖 Storytelling":
111
+ prompt = story_prompt.format(question=query, context=context)
112
+ else:
113
+ prompt = explain_prompt.format(question=query, context=context)
114
+
115
  answer = llm.invoke(prompt)
116
  return answer
117
 
118
+ # Main app logic
119
  if uploaded_file:
120
  raw_text = extract_text(uploaded_file)
121
+ if not raw_text.strip():
122
+ st.error("No text extracted from the file. Please check the file content.")
123
  else:
124
  st.success("📄 Syllabus loaded successfully!")
125
+
126
  query = st.text_input("❓ Ask your question (Urdu or English)")
127
  if query:
128
  with st.spinner("Thinking..."):
129
  vs = create_vectorstore(raw_text)
130
+ answer = get_answer(query, vs, mode)
131
+ st.markdown("### Answer:")
132
+ st.write(answer)
133
+
134
+ if voice_enabled:
135
+ audio_file = generate_voice(answer)
136
+ with open(audio_file, "rb") as audio:
137
+ audio_bytes = audio.read()
138
+ st.audio(audio_bytes, format="audio/mp3")
 
139
  else:
140
+ st.info("Please paste a Google Drive link to your syllabus file (.pdf or .docx) above.")