MiakOnline commited on
Commit
4507d2c
·
verified ·
1 Parent(s): 78733f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -34
app.py CHANGED
@@ -1,45 +1,46 @@
1
  import streamlit as st
2
  from pypdf import PdfReader
3
  from docx import Document
4
- from PIL import Image
5
  import requests
6
  import os
7
  import tempfile
8
- import base64
9
  from gtts import gTTS
10
  from langchain.vectorstores import FAISS
11
  from langchain.embeddings import HuggingFaceEmbeddings
12
  from langchain.text_splitter import CharacterTextSplitter
13
- from langchain_core.documents import Document as LCDocument
14
- from langchain_core.runnables import RunnableLambda, RunnablePassthrough
15
  from langchain_core.prompts import PromptTemplate
16
- from langchain_community.llms import Groq
17
  from langchain.chains import RetrievalQA
 
18
 
19
- # GROQ API setup
20
- groq_api_key = st.secrets["GROQ_API_KEY"] if "GROQ_API_KEY" in st.secrets else os.getenv("GROQ_API_KEY")
21
- llm = Groq(temperature=0.3, model_name="llama3-8b-8192", groq_api_key=groq_api_key)
 
 
 
 
22
 
23
  # App UI
24
  st.set_page_config(page_title="Learning with Fun", layout="wide")
25
  st.title("📘 Learning with Fun - Kids QA App")
26
  st.markdown("Ask questions from your syllabus! 📚")
27
 
28
- # Sidebar
29
  grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
30
  subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
31
  mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
32
  voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
33
 
34
- # Google Drive PDF/DOC support
35
  def fetch_from_gdrive(link):
 
36
  if "id=" in link:
37
  file_id = link.split("id=")[1]
38
  elif "/d/" in link:
39
  file_id = link.split("/d/")[1].split("/")[0]
40
- else:
41
  return None
42
-
43
  url = f"https://drive.google.com/uc?export=download&id={file_id}"
44
  response = requests.get(url)
45
  if response.status_code == 200:
@@ -59,7 +60,7 @@ if file_link:
59
  else:
60
  st.error("Invalid Google Drive link or download error.")
61
 
62
- # Extract text
63
  def extract_text(file_path):
64
  text = ""
65
  if file_path.endswith(".pdf"):
@@ -74,53 +75,48 @@ def extract_text(file_path):
74
  text += para.text + "\n"
75
  return text
76
 
77
- # Embeddings + Vectorstore
78
  def create_vectorstore(text):
79
- text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
80
- docs = text_splitter.create_documents([text])
81
  embeddings = HuggingFaceEmbeddings()
82
  vectorstore = FAISS.from_documents(docs, embeddings)
83
  return vectorstore
84
 
85
- # Prompt templates
86
  story_prompt = PromptTemplate.from_template(
87
  "ایک طالب علم نے سوال کیا: {question}\n"
88
  "نصاب کی معلومات: {context}\n"
89
  "برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔"
90
  )
91
-
92
  explain_prompt = PromptTemplate.from_template(
93
  "سوال: {question}\n"
94
  "نصاب کا سیاق و سباق: {context}\n"
95
  "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
96
  )
97
 
98
- # TTS
99
  def generate_voice(text, lang='ur'):
100
  tts = gTTS(text, lang=lang)
101
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
102
  tts.save(tts_file.name)
103
  return tts_file.name
104
 
105
- # QA pipeline
106
  def get_answer(query, vectorstore, mode):
107
  retriever = vectorstore.as_retriever()
108
  docs = retriever.get_relevant_documents(query)
109
  context = "\n".join([doc.page_content for doc in docs])
 
 
 
 
 
110
 
111
- if mode == "📖 Storytelling":
112
- prompt = story_prompt.format(question=query, context=context)
113
- else:
114
- prompt = explain_prompt.format(question=query, context=context)
115
-
116
- answer = llm.invoke(prompt)
117
- return answer
118
-
119
- # Main logic
120
  if uploaded_file:
121
  raw_text = extract_text(uploaded_file)
122
  st.success("📄 Syllabus loaded successfully!")
123
-
124
  query = st.text_input("❓ Ask your question (Urdu or English)")
125
  if query:
126
  with st.spinner("Thinking..."):
@@ -128,11 +124,9 @@ if uploaded_file:
128
  answer = get_answer(query, vs, mode)
129
  st.markdown("### ✅ Answer:")
130
  st.write(answer)
131
-
132
  if voice_enabled:
133
  audio_file = generate_voice(answer)
134
- with open(audio_file, "rb") as audio:
135
- audio_bytes = audio.read()
136
- st.audio(audio_bytes, format="audio/mp3")
137
  else:
138
  st.info("Please paste a valid Google Drive link to load your syllabus file.")
 
1
  import streamlit as st
2
  from pypdf import PdfReader
3
  from docx import Document
 
4
  import requests
5
  import os
6
  import tempfile
 
7
  from gtts import gTTS
8
  from langchain.vectorstores import FAISS
9
  from langchain.embeddings import HuggingFaceEmbeddings
10
  from langchain.text_splitter import CharacterTextSplitter
 
 
11
  from langchain_core.prompts import PromptTemplate
12
+ from transformers import pipeline
13
  from langchain.chains import RetrievalQA
14
+ from langchain.llms import HuggingFacePipeline
15
 
16
+ # Set up HuggingFace text-generation pipeline (you can change the model)
17
+ text_gen_pipeline = pipeline(
18
+ "text-generation",
19
+ model="gpt2", # Small model for demo; replace with your preferred model
20
+ max_length=150
21
+ )
22
+ llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
23
 
24
  # App UI
25
  st.set_page_config(page_title="Learning with Fun", layout="wide")
26
  st.title("📘 Learning with Fun - Kids QA App")
27
  st.markdown("Ask questions from your syllabus! 📚")
28
 
29
+ # Sidebar controls
30
  grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
31
  subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
32
  mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
33
  voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
34
 
35
+ # Fetch file from Google Drive
36
  def fetch_from_gdrive(link):
37
+ file_id = None
38
  if "id=" in link:
39
  file_id = link.split("id=")[1]
40
  elif "/d/" in link:
41
  file_id = link.split("/d/")[1].split("/")[0]
42
+ if not file_id:
43
  return None
 
44
  url = f"https://drive.google.com/uc?export=download&id={file_id}"
45
  response = requests.get(url)
46
  if response.status_code == 200:
 
60
  else:
61
  st.error("Invalid Google Drive link or download error.")
62
 
63
+ # Extract text from PDF or DOCX
64
  def extract_text(file_path):
65
  text = ""
66
  if file_path.endswith(".pdf"):
 
75
  text += para.text + "\n"
76
  return text
77
 
78
+ # Create vectorstore for retrieval
79
  def create_vectorstore(text):
80
+ splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
81
+ docs = splitter.create_documents([text])
82
  embeddings = HuggingFaceEmbeddings()
83
  vectorstore = FAISS.from_documents(docs, embeddings)
84
  return vectorstore
85
 
86
+ # Prompts
87
  story_prompt = PromptTemplate.from_template(
88
  "ایک طالب علم نے سوال کیا: {question}\n"
89
  "نصاب کی معلومات: {context}\n"
90
  "برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔"
91
  )
 
92
  explain_prompt = PromptTemplate.from_template(
93
  "سوال: {question}\n"
94
  "نصاب کا سیاق و سباق: {context}\n"
95
  "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
96
  )
97
 
98
+ # Generate voice from text
99
  def generate_voice(text, lang='ur'):
100
  tts = gTTS(text, lang=lang)
101
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
102
  tts.save(tts_file.name)
103
  return tts_file.name
104
 
105
+ # Get answer using LLM
106
  def get_answer(query, vectorstore, mode):
107
  retriever = vectorstore.as_retriever()
108
  docs = retriever.get_relevant_documents(query)
109
  context = "\n".join([doc.page_content for doc in docs])
110
+ prompt = story_prompt.format(question=query, context=context) if mode == "📖 Storytelling" else explain_prompt.format(question=query, context=context)
111
+ # Use LLM pipeline to generate answer text
112
+ response = llm.invoke(prompt)
113
+ # llm.invoke returns a string answer
114
+ return response
115
 
116
+ # Main app flow
 
 
 
 
 
 
 
 
117
  if uploaded_file:
118
  raw_text = extract_text(uploaded_file)
119
  st.success("📄 Syllabus loaded successfully!")
 
120
  query = st.text_input("❓ Ask your question (Urdu or English)")
121
  if query:
122
  with st.spinner("Thinking..."):
 
124
  answer = get_answer(query, vs, mode)
125
  st.markdown("### ✅ Answer:")
126
  st.write(answer)
 
127
  if voice_enabled:
128
  audio_file = generate_voice(answer)
129
+ with open(audio_file, "rb") as f:
130
+ st.audio(f.read(), format="audio/mp3")
 
131
  else:
132
  st.info("Please paste a valid Google Drive link to load your syllabus file.")