MiakOnline commited on
Commit
a3fc155
ยท
verified ยท
1 Parent(s): 7bd8007

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -114
app.py CHANGED
@@ -1,137 +1,121 @@
1
  # learning_with_fun_app.py
2
 
 
 
3
  import streamlit as st
4
- from langchain.vectorstores import FAISS
5
- from langchain.embeddings import HuggingFaceEmbeddings
 
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- from langchain.docstore.document import Document
8
- from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, UnstructuredImageLoader
9
  from gtts import gTTS
10
- import os
11
- import tempfile
12
  import base64
13
- import requests
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # -------------------------------
16
- # 1. Load documents
17
- # -------------------------------
 
18
  def load_documents(uploaded_files):
 
19
  docs = []
20
- for uploaded_file in uploaded_files:
21
- file_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
22
- with open(file_path, "wb") as f:
23
- f.write(uploaded_file.getbuffer())
24
-
25
- if uploaded_file.name.endswith(".pdf"):
26
- loader = PyPDFLoader(file_path)
27
- elif uploaded_file.name.endswith(".docx"):
28
- loader = Docx2txtLoader(file_path)
29
- elif uploaded_file.name.endswith(('.jpg', '.jpeg', '.png')):
30
- loader = UnstructuredImageLoader(file_path)
 
31
  else:
32
- st.warning("Unsupported file format")
33
  continue
34
-
35
  docs.extend(loader.load())
36
  return docs
37
 
38
- # -------------------------------
39
- # 2. Chunking documents
40
- # -------------------------------
41
- def chunk_documents(docs):
42
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
43
- return splitter.split_documents(docs)
44
-
45
- # -------------------------------
46
- # 3. Create embeddings
47
- # -------------------------------
48
- def create_embeddings():
49
- return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
50
-
51
- # -------------------------------
52
- # 4. Create and save FAISS index
53
- # -------------------------------
54
- def create_faiss_index(chunks, embedding):
55
- vectorstore = FAISS.from_documents(chunks, embedding)
56
- return vectorstore
57
-
58
- # -------------------------------
59
- # 5. Query vectorstore and generate answers
60
- # -------------------------------
61
- def query_faiss(vectorstore, question, embedding, grade, subject):
62
- relevant_docs = vectorstore.similarity_search(question, k=3)
63
- context = "\n".join([doc.page_content for doc in relevant_docs])
64
- return generate_answers(context, question)
65
-
66
- # -------------------------------
67
- # 6. Generate answer using GROQ + LLaMA 3
68
- # -------------------------------
69
- def generate_answers(context, question):
70
- prompt = f"""
71
- You are a helpful teacher. Use the context below to answer the question in two formats:
72
 
73
- 1. Beginner explanation (Urdu + English mix)
74
- 2. Storytelling style (Urdu-English mix with a fun tone)
 
75
 
 
 
 
 
 
 
 
 
76
  Context:
77
  {context}
78
 
79
  Question:
80
- {question}
81
 
82
- Answer:
 
 
83
  """
84
- headers = {"Authorization": f"Bearer YOUR_GROQ_API_KEY"}
85
- payload = {
86
- "model": "llama3-8b-8192",
87
- "messages": [{"role": "user", "content": prompt}],
88
- "temperature": 0.7
89
- }
90
- response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload)
91
- result = response.json()
92
- return result["choices"][0]["message"]["content"]
93
-
94
- # -------------------------------
95
- # 7. Convert storytelling to voice
96
- # -------------------------------
97
- def text_to_speech(text):
98
- tts = gTTS(text=text, lang='ur')
99
- file_path = os.path.join(tempfile.gettempdir(), "story.mp3")
100
- tts.save(file_path)
101
- return file_path
102
-
103
- # -------------------------------
104
- # Streamlit UI
105
- # -------------------------------
106
- def main():
107
- st.set_page_config(page_title="Learning with Fun")
108
- st.title("๐Ÿ“š Learning with Fun - Ask Questions & Hear Stories!")
109
-
110
- grade = st.selectbox("Select Grade:", ["Grade 5", "Grade 6"])
111
- subject = st.selectbox("Select Subject:", ["Science", "Math", "English"])
112
-
113
- uploaded_files = st.file_uploader("Upload Book Files (PDF, DOCX, Images)", accept_multiple_files=True)
114
-
115
- question = st.text_input("Ask a Question (English or Urdu):")
116
-
117
- if st.button("Generate Answer") and uploaded_files and question:
118
- with st.spinner("Processing documents and generating answer..."):
119
- docs = load_documents(uploaded_files)
120
- chunks = chunk_documents(docs)
121
- embedding = create_embeddings()
122
- vectorstore = create_faiss_index(chunks, embedding)
123
- answer = query_faiss(vectorstore, question, embedding, grade, subject)
124
-
125
- st.markdown("### ๐Ÿ“˜ Answer:")
126
- parts = answer.split("2. Storytelling style")
127
- st.markdown(f"**1. Explanation:**\n\n{parts[0]}")
128
- if len(parts) > 1:
129
- story_text = parts[1].strip()
130
- st.markdown(f"**2. Storytelling Style:**\n\n{story_text}")
131
-
132
- audio_file = text_to_speech(story_text)
133
- audio_bytes = open(audio_file, 'rb').read()
134
- st.audio(audio_bytes, format='audio/mp3')
135
-
136
- if __name__ == "__main__":
137
- main()
 
1
  # learning_with_fun_app.py
2
 
3
+ import os
4
+ import tempfile
5
  import streamlit as st
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader, UnstructuredImageLoader
8
+ from langchain_community.embeddings import HuggingFaceEmbeddings
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.schema import Document
 
11
  from gtts import gTTS
 
 
12
  import base64
13
+ import shutil
14
+
15
+ # ----------------------------- UI SETUP --------------------------------------
16
+ st.set_page_config(page_title="Learning with Fun", layout="wide")
17
+ st.title("๐Ÿ“š Learning with Fun - Educational Q&A for Kids")
18
+
19
+ # ----------------------------- USER INPUT -----------------------------------
20
+ grade = st.selectbox("Select your Grade", ["Grade 5", "Grade 6"])
21
+ subject = st.selectbox("Select Subject", ["Science", "Math", "English"])
22
+
23
+ uploaded_files = st.file_uploader("Upload textbook files (PDF, DOCX, JPEG)", type=["pdf", "docx", "jpg", "jpeg"], accept_multiple_files=True)
24
+ question = st.text_input("Ask your question in English or Urdu")
25
 
26
+ # ------------------------- SETUP TEMP FOLDER -------------------------------
27
+ temp_dir = tempfile.mkdtemp()
28
+
29
+ # ------------------------- UTILITY FUNCTIONS -------------------------------
30
  def load_documents(uploaded_files):
31
+ """Load various file types into LangChain Document format."""
32
  docs = []
33
+ for file in uploaded_files:
34
+ ext = file.name.split(".")[-1].lower()
35
+ path = os.path.join(temp_dir, file.name)
36
+ with open(path, "wb") as f:
37
+ f.write(file.read())
38
+
39
+ if ext == "pdf":
40
+ loader = PyMuPDFLoader(path)
41
+ elif ext == "docx":
42
+ loader = Docx2txtLoader(path)
43
+ elif ext in ["jpg", "jpeg"]:
44
+ loader = UnstructuredImageLoader(path)
45
  else:
 
46
  continue
 
47
  docs.extend(loader.load())
48
  return docs
49
 
50
+ def split_documents(documents):
51
+ """Split documents into smaller chunks."""
 
 
52
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
53
+ return splitter.split_documents(documents)
54
+
55
+ def create_vector_store(chunks):
56
+ """Create FAISS vector DB from text chunks."""
57
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
58
+ return FAISS.from_documents(chunks, embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ def retrieve_docs(query, vector_store):
61
+ """Search FAISS for relevant chunks."""
62
+ return vector_store.similarity_search(query, k=3)
63
 
64
+ def query_llm_groq(context, query):
65
+ """Send query with context to GROQ LLaMA 3 model and return formatted answers."""
66
+ from openai import OpenAI
67
+ import os
68
+
69
+ client = OpenAI(api_key=os.getenv("GROQ_API_KEY"), base_url="https://api.groq.com/openai/v1")
70
+
71
+ prompt = f"""
72
  Context:
73
  {context}
74
 
75
  Question:
76
+ {query}
77
 
78
+ Provide two outputs:
79
+ 1. A simple, educational explanation in English + Urdu.
80
+ 2. A creative storytelling version mixing English and Urdu.
81
  """
82
+ response = client.chat.completions.create(
83
+ model="llama3-8b-8192",
84
+ messages=[{"role": "user", "content": prompt}]
85
+ )
86
+ return response.choices[0].message.content
87
+
88
+ def generate_audio(text, lang='ur'):
89
+ """Convert text to audio using gTTS and return playable audio HTML."""
90
+ tts = gTTS(text, lang=lang)
91
+ audio_path = os.path.join(temp_dir, "response.mp3")
92
+ tts.save(audio_path)
93
+ with open(audio_path, "rb") as audio_file:
94
+ audio_bytes = audio_file.read()
95
+ b64 = base64.b64encode(audio_bytes).decode()
96
+ audio_html = f'<audio autoplay controls><source src="data:audio/mp3;base64,{b64}" type="audio/mp3"></audio>'
97
+ return audio_html
98
+
99
+ # ----------------------------- MAIN LOGIC ----------------------------------
100
+ if question and uploaded_files:
101
+ with st.spinner("Processing your documents..."):
102
+ documents = load_documents(uploaded_files)
103
+ chunks = split_documents(documents)
104
+ vector_db = create_vector_store(chunks)
105
+
106
+ results = retrieve_docs(question, vector_db)
107
+ context_text = "\n".join([doc.page_content for doc in results])
108
+ answer = query_llm_groq(context_text, question)
109
+
110
+ st.markdown("### ๐Ÿ“˜ Answer")
111
+ parts = answer.split("2.")
112
+ if len(parts) == 2:
113
+ st.markdown(f"**Explanation:**\n{parts[0]}")
114
+ st.markdown(f"**Storytelling:**\n{parts[1]}")
115
+ st.markdown(generate_audio(parts[1]), unsafe_allow_html=True)
116
+ else:
117
+ st.markdown(answer)
118
+
119
+ # ----------------------------- CLEANUP --------------------------------------
120
+ if os.path.exists(temp_dir):
121
+ shutil.rmtree(temp_dir)