MiakOnline commited on
Commit
9b60ea9
·
verified ·
1 Parent(s): b0dbdf5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -86
app.py CHANGED
@@ -1,101 +1,135 @@
1
  import streamlit as st
2
- from langchain_community.vectorstores import FAISS
 
 
 
 
 
 
 
 
3
  from langchain.embeddings import HuggingFaceEmbeddings
4
- from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader, UnstructuredFileLoader
5
  from langchain.text_splitter import CharacterTextSplitter
6
- from langchain_community.llms import ChatGroq
 
 
 
7
  from langchain.chains import RetrievalQA
8
- from gtts import gTTS
9
- from langdetect import detect
10
- import os
11
- import shutil
12
- import pickle
13
- import tempfile
14
 
15
- # Title
16
- st.set_page_config(page_title="Learning with Fun 👦📚", layout="centered")
17
- st.title("🎓 Learning with Fun – Grade 5 & 6 📘")
18
- st.markdown("Ask your questions in Urdu or English. Get simple and storytelling-style answers!")
19
 
20
- # Temp directory for file upload
21
- temp_dir = tempfile.mkdtemp()
 
 
22
 
23
- # File upload
24
- uploaded_file = st.file_uploader("📄 Upload a textbook file (PDF, DOCX, or TXT)", type=["pdf", "docx", "txt"])
 
 
 
25
 
26
- # Load documents
27
- def load_document(file_path):
28
- if file_path.endswith(".pdf"):
29
- loader = PyMuPDFLoader(file_path)
30
- elif file_path.endswith(".docx"):
31
- loader = Docx2txtLoader(file_path)
32
  else:
33
- loader = UnstructuredFileLoader(file_path)
34
- return loader.load()
35
-
36
- # Vector DB functions
37
- def create_vectorstore(docs):
38
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
39
- texts = text_splitter.split_documents(docs)
40
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
41
- db = FAISS.from_documents(texts, embeddings)
42
- return db
43
-
44
- # Load or create vectorstore
45
- def get_vectorstore(file):
46
- file_path = os.path.join(temp_dir, file.name)
47
- with open(file_path, "wb") as f:
48
- f.write(file.getbuffer())
49
-
50
- pickle_path = os.path.join(temp_dir, f"{file.name}.pkl")
51
-
52
- if os.path.exists(pickle_path):
53
- with open(pickle_path, "rb") as f:
54
- db = pickle.load(f)
55
  else:
56
- docs = load_document(file_path)
57
- db = create_vectorstore(docs)
58
- with open(pickle_path, "wb") as f:
59
- pickle.dump(db, f)
60
- return db
61
-
62
- # Generate text to speech
63
- def generate_tts(text, lang):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  tts = gTTS(text, lang=lang)
65
- output_path = os.path.join(temp_dir, "output.mp3")
66
- tts.save(output_path)
67
- return output_path
68
-
69
- # Run QA chain
70
- def run_chain(db, query, response_type):
71
- llm = ChatGroq(temperature=0.3, model_name="llama3-8b-8192")
72
- qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever())
73
-
74
- if response_type == "Explain Simply":
75
- prompt = f"Explain this to a Grade 5 kid in simple Urdu:\n{query}"
 
76
  else:
77
- prompt = f"Tell a short fun story in Urdu to explain:\n{query}"
78
-
79
- result = qa_chain.run(prompt)
80
- return result
81
 
82
- # UI Controls
83
  if uploaded_file:
84
- query = st.text_input("❓ Ask a question:")
85
- response_type = st.radio("Select answer format:", ["Explain Simply", "Storytelling"])
86
-
 
87
  if query:
88
- with st.spinner("🤖 Thinking..."):
89
- db = get_vectorstore(uploaded_file)
90
- answer = run_chain(db, query, response_type)
91
- st.markdown("### 🧠 Answer:")
92
- st.success(answer)
93
-
94
- # Detect language and speak
95
- lang = "ur" if detect(answer) == "ur" else "en"
96
- audio_path = generate_tts(answer, lang)
97
- st.audio(audio_path, format="audio/mp3")
98
-
99
- # Cleanup
100
- st.markdown("---")
101
- st.caption("Made with 💙 using Streamlit + LLaMA 3 + Hugging Face")
 
1
  import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from docx import Document
4
+ from PIL import Image
5
+ import requests
6
+ import os
7
+ import tempfile
8
+ import base64
9
+ from gtts import gTTS
10
+ from langchain.vectorstores import FAISS
11
  from langchain.embeddings import HuggingFaceEmbeddings
 
12
  from langchain.text_splitter import CharacterTextSplitter
13
+ from langchain_core.documents import Document as LCDocument
14
+ from langchain_core.runnables import RunnableLambda, RunnablePassthrough
15
+ from langchain_core.prompts import PromptTemplate
16
+ from langchain_community.llms import Groq
17
  from langchain.chains import RetrievalQA
 
 
 
 
 
 
18
 
19
+ # GROQ API setup
20
+ groq_api_key = st.secrets["GROQ_API_KEY"] if "GROQ_API_KEY" in st.secrets else os.getenv("GROQ_API_KEY")
21
+ llm = Groq(temperature=0.3, model_name="llama3-8b-8192", groq_api_key=groq_api_key)
 
22
 
23
+ # App UI
24
+ st.set_page_config(page_title="Learning with Fun", layout="wide")
25
+ st.title("📘 Learning with Fun - Kids QA App")
26
+ st.markdown("Ask questions from your syllabus! 📚")
27
 
28
+ # Sidebar
29
+ grade = st.sidebar.selectbox("Select Grade", ["Grade 5", "Grade 6"])
30
+ subject = st.sidebar.selectbox("Select Subject", ["Science", "Math", "Computer", "Islamiyat"])
31
+ mode = st.sidebar.radio("Answer Format", ["🧠 Beginner Explanation", "📖 Storytelling"])
32
+ voice_enabled = st.sidebar.checkbox("🔈 Enable Voice", value=True)
33
 
34
+ # Google Drive PDF/DOC support
35
+ def fetch_from_gdrive(link):
36
+ if "id=" in link:
37
+ file_id = link.split("id=")[1]
38
+ elif "/d/" in link:
39
+ file_id = link.split("/d/")[1].split("/")[0]
40
  else:
41
+ return None
42
+
43
+ url = f"https://drive.google.com/uc?export=download&id={file_id}"
44
+ response = requests.get(url)
45
+ if response.status_code == 200:
46
+ tmp_file = tempfile.NamedTemporaryFile(delete=False)
47
+ tmp_file.write(response.content)
48
+ tmp_file.close()
49
+ return tmp_file.name
50
+ return None
51
+
52
+ uploaded_file = None
53
+ file_link = st.text_input("Paste Google Drive Link to Syllabus File (.pdf or .docx)")
54
+
55
+ if file_link:
56
+ filepath = fetch_from_gdrive(file_link)
57
+ if filepath:
58
+ uploaded_file = filepath
 
 
 
 
59
  else:
60
+ st.error("Invalid Google Drive link or download error.")
61
+
62
+ # Extract text
63
+ def extract_text(file_path):
64
+ text = ""
65
+ if file_path.endswith(".pdf"):
66
+ reader = PdfReader(file_path)
67
+ for page in reader.pages:
68
+ text += page.extract_text()
69
+ elif file_path.endswith(".docx"):
70
+ doc = Document(file_path)
71
+ for para in doc.paragraphs:
72
+ text += para.text + "\n"
73
+ return text
74
+
75
+ # Embeddings + Vectorstore
76
+ def create_vectorstore(text):
77
+ text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
78
+ docs = text_splitter.create_documents([text])
79
+ embeddings = HuggingFaceEmbeddings()
80
+ vectorstore = FAISS.from_documents(docs, embeddings)
81
+ return vectorstore
82
+
83
+ # Prompt templates
84
+ story_prompt = PromptTemplate.from_template(
85
+ "ایک طالب علم نے سوال کیا: {question}\n"
86
+ "نصاب کی معلومات: {context}\n"
87
+ "برائے مہربانی ایک دلچسپ کہانی کی صورت میں بچے کو اردو میں جواب دیں۔"
88
+ )
89
+
90
+ explain_prompt = PromptTemplate.from_template(
91
+ "سوال: {question}\n"
92
+ "نصاب کا سیاق و سباق: {context}\n"
93
+ "براہ کرم بچے کو اردو زبان میں آسان انداز میں سمجھائیں۔"
94
+ )
95
+
96
+ # TTS
97
+ def generate_voice(text, lang='ur'):
98
  tts = gTTS(text, lang=lang)
99
+ tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
100
+ tts.save(tts_file.name)
101
+ return tts_file.name
102
+
103
+ # QA pipeline
104
+ def get_answer(query, vectorstore, mode):
105
+ retriever = vectorstore.as_retriever()
106
+ docs = retriever.get_relevant_documents(query)
107
+ context = "\n".join([doc.page_content for doc in docs])
108
+
109
+ if mode == "📖 Storytelling":
110
+ prompt = story_prompt.format(question=query, context=context)
111
  else:
112
+ prompt = explain_prompt.format(question=query, context=context)
113
+
114
+ answer = llm.invoke(prompt)
115
+ return answer
116
 
117
+ # Main logic
118
  if uploaded_file:
119
+ raw_text = extract_text(uploaded_file)
120
+ st.success("📄 Syllabus loaded successfully!")
121
+
122
+ query = st.text_input("❓ Ask your question (Urdu or English)")
123
  if query:
124
+ with st.spinner("Thinking..."):
125
+ vs = create_vectorstore(raw_text)
126
+ answer = get_answer(query, vs, mode)
127
+ st.markdown("### Answer:")
128
+ st.write(answer)
129
+
130
+ if voice_enabled:
131
+ audio_file = generate_voice(answer)
132
+ with open(audio_file, "rb") as audio:
133
+ audio_bytes = audio.read()
134
+ st.audio(audio_bytes, format="audio/mp3")
135
+