MiakOnline commited on
Commit
e57b753
·
verified ·
1 Parent(s): c97c3ca

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # learning_with_fun_app.py
2
+
3
+ import streamlit as st
4
+ from langchain.vectorstores import FAISS
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.docstore.document import Document
8
+ from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, UnstructuredImageLoader
9
+ from gtts import gTTS
10
+ import os
11
+ import tempfile
12
+ import base64
13
+ import requests
14
+
15
+ # -------------------------------
16
+ # 1. Load documents
17
+ # -------------------------------
18
+ def load_documents(uploaded_files):
19
+ docs = []
20
+ for uploaded_file in uploaded_files:
21
+ file_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
22
+ with open(file_path, "wb") as f:
23
+ f.write(uploaded_file.getbuffer())
24
+
25
+ if uploaded_file.name.endswith(".pdf"):
26
+ loader = PyPDFLoader(file_path)
27
+ elif uploaded_file.name.endswith(".docx"):
28
+ loader = Docx2txtLoader(file_path)
29
+ elif uploaded_file.name.endswith(('.jpg', '.jpeg', '.png')):
30
+ loader = UnstructuredImageLoader(file_path)
31
+ else:
32
+ st.warning("Unsupported file format")
33
+ continue
34
+
35
+ docs.extend(loader.load())
36
+ return docs
37
+
38
+ # -------------------------------
39
+ # 2. Chunking documents
40
+ # -------------------------------
41
+ def chunk_documents(docs):
42
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
43
+ return splitter.split_documents(docs)
44
+
45
+ # -------------------------------
46
+ # 3. Create embeddings
47
+ # -------------------------------
48
+ def create_embeddings():
49
+ return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
50
+
51
+ # -------------------------------
52
+ # 4. Create and save FAISS index
53
+ # -------------------------------
54
+ def create_faiss_index(chunks, embedding):
55
+ vectorstore = FAISS.from_documents(chunks, embedding)
56
+ return vectorstore
57
+
58
+ # -------------------------------
59
+ # 5. Query vectorstore and generate answers
60
+ # -------------------------------
61
+ def query_faiss(vectorstore, question, embedding, grade, subject):
62
+ relevant_docs = vectorstore.similarity_search(question, k=3)
63
+ context = "\n".join([doc.page_content for doc in relevant_docs])
64
+ return generate_answers(context, question)
65
+
66
+ # -------------------------------
67
+ # 6. Generate answer using GROQ + LLaMA 3
68
+ # -------------------------------
69
+ def generate_answers(context, question):
70
+ prompt = f"""
71
+ You are a helpful teacher. Use the context below to answer the question in two formats:
72
+
73
+ 1. Beginner explanation (Urdu + English mix)
74
+ 2. Storytelling style (Urdu-English mix with a fun tone)
75
+
76
+ Context:
77
+ {context}
78
+
79
+ Question:
80
+ {question}
81
+
82
+ Answer:
83
+ """
84
+ headers = {"Authorization": f"Bearer YOUR_GROQ_API_KEY"}
85
+ payload = {
86
+ "model": "llama3-8b-8192",
87
+ "messages": [{"role": "user", "content": prompt}],
88
+ "temperature": 0.7
89
+ }
90
+ response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload)
91
+ result = response.json()
92
+ return result["choices"][0]["message"]["content"]
93
+
94
+ # -------------------------------
95
+ # 7. Convert storytelling to voice
96
+ # -------------------------------
97
+ def text_to_speech(text):
98
+ tts = gTTS(text=text, lang='ur')
99
+ file_path = os.path.join(tempfile.gettempdir(), "story.mp3")
100
+ tts.save(file_path)
101
+ return file_path
102
+
103
+ # -------------------------------
104
+ # Streamlit UI
105
+ # -------------------------------
106
+ def main():
107
+ st.set_page_config(page_title="Learning with Fun")
108
+ st.title("📚 Learning with Fun - Ask Questions & Hear Stories!")
109
+
110
+ grade = st.selectbox("Select Grade:", ["Grade 5", "Grade 6"])
111
+ subject = st.selectbox("Select Subject:", ["Science", "Math", "English"])
112
+
113
+ uploaded_files = st.file_uploader("Upload Book Files (PDF, DOCX, Images)", accept_multiple_files=True)
114
+
115
+ question = st.text_input("Ask a Question (English or Urdu):")
116
+
117
+ if st.button("Generate Answer") and uploaded_files and question:
118
+ with st.spinner("Processing documents and generating answer..."):
119
+ docs = load_documents(uploaded_files)
120
+ chunks = chunk_documents(docs)
121
+ embedding = create_embeddings()
122
+ vectorstore = create_faiss_index(chunks, embedding)
123
+ answer = query_faiss(vectorstore, question, embedding, grade, subject)
124
+
125
+ st.markdown("### 📘 Answer:")
126
+ parts = answer.split("2. Storytelling style")
127
+ st.markdown(f"**1. Explanation:**\n\n{parts[0]}")
128
+ if len(parts) > 1:
129
+ story_text = parts[1].strip()
130
+ st.markdown(f"**2. Storytelling Style:**\n\n{story_text}")
131
+
132
+ audio_file = text_to_speech(story_text)
133
+ audio_bytes = open(audio_file, 'rb').read()
134
+ st.audio(audio_bytes, format='audio/mp3')
135
+
136
+ if __name__ == "__main__":
137
+ main()