MiakOnline commited on
Commit
f82cc7b
·
verified ·
1 Parent(s): 260a142

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -17
app.py CHANGED
@@ -3,53 +3,80 @@ from pypdf import PdfReader
3
  from docx import Document
4
  import tempfile
5
  from gtts import gTTS
 
6
  from PIL import Image
7
- from langchain_community.vectorstores import FAISS
 
8
  from langchain.embeddings import HuggingFaceEmbeddings
9
  from langchain.text_splitter import CharacterTextSplitter
10
  from langchain.prompts import PromptTemplate
11
  from langchain.llms import HuggingFacePipeline
 
12
  from transformers import pipeline
13
 
 
14
  # Setup HuggingFace pipeline with distilgpt2 (CPU)
15
- text_gen_pipeline = pipeline("text-generation", model="distilgpt2", device=-1)
 
 
 
 
16
  llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
17
 
 
18
  st.set_page_config(page_title="Learning with Fun", layout="wide")
19
  st.title("📘 Learning with Fun - Kids QA App")
20
  st.markdown("Ask questions from your syllabus! 📚")
21
 
 
22
  grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
23
  subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
24
  mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
25
  voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
26
 
 
27
  uploaded_file = st.file_uploader(
28
  "Upload your syllabus file (PDF, DOCX, JPEG, PNG, JPG)",
29
  type=["pdf", "docx", "jpeg", "png", "jpg"]
30
  )
31
 
 
32
  def extract_text_from_uploaded(file) -> str:
33
  text = ""
34
  if file is None:
35
  return text
36
 
37
  if file.type == "application/pdf":
38
- reader = PdfReader(file)
39
- for page in reader.pages:
40
- page_text = page.extract_text()
41
- if page_text:
42
- text += page_text
 
 
 
43
  elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
44
- doc = Document(file)
45
- for para in doc.paragraphs:
46
- text += para.text + "\n"
 
 
 
47
  elif file.type in ["image/jpeg", "image/png"]:
48
  st.warning("Image files currently are not supported for text extraction.")
49
  else:
50
  st.error("Unsupported file format.")
51
  return text
52
 
 
 
 
 
 
 
 
 
 
53
  story_prompt = PromptTemplate.from_template(
54
  "ایک طالب علم نے سوال کیا: {question}\n"
55
  "نصاب کی معلومات: {context}\n"
@@ -62,19 +89,14 @@ explain_prompt = PromptTemplate.from_template(
62
  "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
63
  )
64
 
 
65
  def generate_voice(text: str, lang='ur') -> str:
66
  tts = gTTS(text=text, lang=lang)
67
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
68
  tts.save(tts_file.name)
69
  return tts_file.name
70
 
71
- def create_vectorstore(text: str) -> FAISS:
72
- splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
73
- docs = splitter.create_documents([text])
74
- embeddings = HuggingFaceEmbeddings()
75
- vectorstore = FAISS.from_documents(docs, embeddings)
76
- return vectorstore
77
-
78
  def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
79
  retriever = vectorstore.as_retriever()
80
  docs = retriever.get_relevant_documents(query)
@@ -88,6 +110,7 @@ def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
88
  answer = llm.invoke(prompt)
89
  return answer
90
 
 
91
  if uploaded_file:
92
  raw_text = extract_text_from_uploaded(uploaded_file)
93
  if not raw_text.strip():
 
3
  from docx import Document
4
  import tempfile
5
  from gtts import gTTS
6
+
7
  from PIL import Image
8
+
9
+ from langchain.vectorstores import FAISS
10
  from langchain.embeddings import HuggingFaceEmbeddings
11
  from langchain.text_splitter import CharacterTextSplitter
12
  from langchain.prompts import PromptTemplate
13
  from langchain.llms import HuggingFacePipeline
14
+
15
  from transformers import pipeline
16
 
17
+
18
  # Setup HuggingFace pipeline with distilgpt2 (CPU)
19
+ text_gen_pipeline = pipeline(
20
+ "text-generation",
21
+ model="distilgpt2",
22
+ device=-1 # CPU only
23
+ )
24
  llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
25
 
26
+ # Streamlit app config
27
  st.set_page_config(page_title="Learning with Fun", layout="wide")
28
  st.title("📘 Learning with Fun - Kids QA App")
29
  st.markdown("Ask questions from your syllabus! 📚")
30
 
31
+ # Sidebar widgets
32
  grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
33
  subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
34
  mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
35
  voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
36
 
37
+ # File uploader for syllabus
38
  uploaded_file = st.file_uploader(
39
  "Upload your syllabus file (PDF, DOCX, JPEG, PNG, JPG)",
40
  type=["pdf", "docx", "jpeg", "png", "jpg"]
41
  )
42
 
43
+ # Extract text content from uploaded file directly
44
  def extract_text_from_uploaded(file) -> str:
45
  text = ""
46
  if file is None:
47
  return text
48
 
49
  if file.type == "application/pdf":
50
+ try:
51
+ reader = PdfReader(file)
52
+ for page in reader.pages:
53
+ page_text = page.extract_text()
54
+ if page_text:
55
+ text += page_text
56
+ except Exception as e:
57
+ st.error(f"Error reading PDF file: {e}")
58
  elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
59
+ try:
60
+ doc = Document(file)
61
+ for para in doc.paragraphs:
62
+ text += para.text + "\n"
63
+ except Exception as e:
64
+ st.error(f"Error reading DOCX file: {e}")
65
  elif file.type in ["image/jpeg", "image/png"]:
66
  st.warning("Image files currently are not supported for text extraction.")
67
  else:
68
  st.error("Unsupported file format.")
69
  return text
70
 
71
+ # Create vector store for similarity search
72
+ def create_vectorstore(text: str) -> FAISS:
73
+ splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
74
+ docs = splitter.create_documents([text])
75
+ embeddings = HuggingFaceEmbeddings()
76
+ vectorstore = FAISS.from_documents(docs, embeddings)
77
+ return vectorstore
78
+
79
+ # Prompt templates
80
  story_prompt = PromptTemplate.from_template(
81
  "ایک طالب علم نے سوال کیا: {question}\n"
82
  "نصاب کی معلومات: {context}\n"
 
89
  "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
90
  )
91
 
92
+ # Generate speech audio from text
93
  def generate_voice(text: str, lang='ur') -> str:
94
  tts = gTTS(text=text, lang=lang)
95
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
96
  tts.save(tts_file.name)
97
  return tts_file.name
98
 
99
+ # Generate answer using vectorstore context and LLM
 
 
 
 
 
 
100
  def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
101
  retriever = vectorstore.as_retriever()
102
  docs = retriever.get_relevant_documents(query)
 
110
  answer = llm.invoke(prompt)
111
  return answer
112
 
113
+ # Main app flow
114
  if uploaded_file:
115
  raw_text = extract_text_from_uploaded(uploaded_file)
116
  if not raw_text.strip():