MiakOnline commited on
Commit
190d269
·
verified ·
1 Parent(s): 146c00e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -30
app.py CHANGED
@@ -1,19 +1,22 @@
1
  import streamlit as st
2
  from pypdf import PdfReader
3
  from docx import Document
4
- from PIL import Image
5
- import requests
6
- import os
7
  import tempfile
 
8
  from gtts import gTTS
 
 
 
9
  from langchain.vectorstores import FAISS
10
  from langchain.embeddings import HuggingFaceEmbeddings
11
  from langchain.text_splitter import CharacterTextSplitter
12
- from langchain_core.prompts import PromptTemplate
13
- from transformers import pipeline
14
  from langchain.llms import HuggingFacePipeline
15
 
16
- # Setup HuggingFace pipeline with distilgpt2 (small, CPU-friendly)
 
 
 
17
  text_gen_pipeline = pipeline(
18
  "text-generation",
19
  model="distilgpt2",
@@ -21,24 +24,25 @@ text_gen_pipeline = pipeline(
21
  )
22
  llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
23
 
24
- # App UI
25
  st.set_page_config(page_title="Learning with Fun", layout="wide")
26
  st.title("📘 Learning with Fun - Kids QA App")
27
  st.markdown("Ask questions from your syllabus! 📚")
28
 
29
- # Sidebar
30
  grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
31
  subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
32
  mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
33
  voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
34
 
35
- # Google Drive PDF/DOC support
36
- def fetch_from_gdrive(link):
 
37
  if "id=" in link:
38
- file_id = link.split("id=")[1]
39
  elif "/d/" in link:
40
  file_id = link.split("/d/")[1].split("/")[0]
41
- else:
42
  return None
43
 
44
  url = f"https://drive.google.com/uc?export=download&id={file_id}"
@@ -60,23 +64,27 @@ if file_link:
60
  else:
61
  st.error("Invalid Google Drive link or download error.")
62
 
63
- # Extract text from PDF or DOCX
64
- def extract_text(file_path):
65
  text = ""
66
  if file_path.endswith(".pdf"):
67
  reader = PdfReader(file_path)
68
  for page in reader.pages:
69
- text += page.extract_text() or ""
 
 
70
  elif file_path.endswith(".docx"):
71
  doc = Document(file_path)
72
  for para in doc.paragraphs:
73
  text += para.text + "\n"
 
 
74
  return text
75
 
76
- # Create vectorstore from syllabus text
77
- def create_vectorstore(text):
78
- text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
79
- docs = text_splitter.create_documents([text])
80
  embeddings = HuggingFaceEmbeddings()
81
  vectorstore = FAISS.from_documents(docs, embeddings)
82
  return vectorstore
@@ -94,15 +102,15 @@ explain_prompt = PromptTemplate.from_template(
94
  "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
95
  )
96
 
97
- # Generate voice from text
98
- def generate_voice(text, lang='ur'):
99
- tts = gTTS(text, lang=lang)
100
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
101
  tts.save(tts_file.name)
102
  return tts_file.name
103
 
104
- # Get answer using LLM and vectorstore
105
- def get_answer(query, vectorstore, mode):
106
  retriever = vectorstore.as_retriever()
107
  docs = retriever.get_relevant_documents(query)
108
  context = "\n".join([doc.page_content for doc in docs])
@@ -115,26 +123,24 @@ def get_answer(query, vectorstore, mode):
115
  answer = llm.invoke(prompt)
116
  return answer
117
 
118
- # Main app logic
119
  if uploaded_file:
120
  raw_text = extract_text(uploaded_file)
121
  if not raw_text.strip():
122
  st.error("No text extracted from the file. Please check the file content.")
123
  else:
124
  st.success("📄 Syllabus loaded successfully!")
125
-
126
  query = st.text_input("❓ Ask your question (Urdu or English)")
127
  if query:
128
  with st.spinner("Thinking..."):
129
- vs = create_vectorstore(raw_text)
130
- answer = get_answer(query, vs, mode)
131
  st.markdown("### ✅ Answer:")
132
  st.write(answer)
133
 
134
  if voice_enabled:
135
  audio_file = generate_voice(answer)
136
  with open(audio_file, "rb") as audio:
137
- audio_bytes = audio.read()
138
- st.audio(audio_bytes, format="audio/mp3")
139
  else:
140
  st.info("Please paste a Google Drive link to your syllabus file (.pdf or .docx) above.")
 
1
  import streamlit as st
2
  from pypdf import PdfReader
3
  from docx import Document
 
 
 
4
  import tempfile
5
+ import requests
6
  from gtts import gTTS
7
+
8
+ from PIL import Image
9
+
10
  from langchain.vectorstores import FAISS
11
  from langchain.embeddings import HuggingFaceEmbeddings
12
  from langchain.text_splitter import CharacterTextSplitter
13
+ from langchain.prompts import PromptTemplate
 
14
  from langchain.llms import HuggingFacePipeline
15
 
16
+ from transformers import pipeline
17
+
18
+
19
+ # Setup HuggingFace pipeline with distilgpt2 (CPU)
20
  text_gen_pipeline = pipeline(
21
  "text-generation",
22
  model="distilgpt2",
 
24
  )
25
  llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
26
 
27
+ # Streamlit app config
28
  st.set_page_config(page_title="Learning with Fun", layout="wide")
29
  st.title("📘 Learning with Fun - Kids QA App")
30
  st.markdown("Ask questions from your syllabus! 📚")
31
 
32
+ # Sidebar widgets
33
  grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
34
  subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
35
  mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
36
  voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
37
 
38
+ # Fetch syllabus file from Google Drive link
39
+ def fetch_from_gdrive(link: str) -> str | None:
40
+ file_id = None
41
  if "id=" in link:
42
+ file_id = link.split("id=")[1].split("&")[0]
43
  elif "/d/" in link:
44
  file_id = link.split("/d/")[1].split("/")[0]
45
+ if not file_id:
46
  return None
47
 
48
  url = f"https://drive.google.com/uc?export=download&id={file_id}"
 
64
  else:
65
  st.error("Invalid Google Drive link or download error.")
66
 
67
+ # Extract text content from uploaded file
68
+ def extract_text(file_path: str) -> str:
69
  text = ""
70
  if file_path.endswith(".pdf"):
71
  reader = PdfReader(file_path)
72
  for page in reader.pages:
73
+ page_text = page.extract_text()
74
+ if page_text:
75
+ text += page_text
76
  elif file_path.endswith(".docx"):
77
  doc = Document(file_path)
78
  for para in doc.paragraphs:
79
  text += para.text + "\n"
80
+ else:
81
+ st.error("Unsupported file format. Please upload a PDF or DOCX file.")
82
  return text
83
 
84
+ # Create vector store for similarity search
85
+ def create_vectorstore(text: str) -> FAISS:
86
+ splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
87
+ docs = splitter.create_documents([text])
88
  embeddings = HuggingFaceEmbeddings()
89
  vectorstore = FAISS.from_documents(docs, embeddings)
90
  return vectorstore
 
102
  "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
103
  )
104
 
105
+ # Generate speech audio from text
106
+ def generate_voice(text: str, lang='ur') -> str:
107
+ tts = gTTS(text=text, lang=lang)
108
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
109
  tts.save(tts_file.name)
110
  return tts_file.name
111
 
112
+ # Generate answer using vectorstore context and LLM
113
+ def get_answer(query: str, vectorstore: FAISS, mode: str) -> str:
114
  retriever = vectorstore.as_retriever()
115
  docs = retriever.get_relevant_documents(query)
116
  context = "\n".join([doc.page_content for doc in docs])
 
123
  answer = llm.invoke(prompt)
124
  return answer
125
 
126
+ # Main app flow
127
  if uploaded_file:
128
  raw_text = extract_text(uploaded_file)
129
  if not raw_text.strip():
130
  st.error("No text extracted from the file. Please check the file content.")
131
  else:
132
  st.success("📄 Syllabus loaded successfully!")
 
133
  query = st.text_input("❓ Ask your question (Urdu or English)")
134
  if query:
135
  with st.spinner("Thinking..."):
136
+ vectorstore = create_vectorstore(raw_text)
137
+ answer = get_answer(query, vectorstore, mode)
138
  st.markdown("### ✅ Answer:")
139
  st.write(answer)
140
 
141
  if voice_enabled:
142
  audio_file = generate_voice(answer)
143
  with open(audio_file, "rb") as audio:
144
+ st.audio(audio.read(), format="audio/mp3")
 
145
  else:
146
  st.info("Please paste a Google Drive link to your syllabus file (.pdf or .docx) above.")