MiakOnline commited on
Commit
b0dbdf5
·
verified ·
1 Parent(s): 8cb9c20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -52
app.py CHANGED
@@ -1,67 +1,101 @@
1
- import os
2
  import streamlit as st
3
- from langchain.vectorstores import FAISS
4
  from langchain.embeddings import HuggingFaceEmbeddings
 
 
 
5
  from langchain.chains import RetrievalQA
6
- from langchain_groq import ChatGroq
7
  from gtts import gTTS
 
 
 
 
8
  import tempfile
9
 
10
- # Set your Streamlit page config
11
- st.set_page_config(page_title="Learning with Fun", page_icon="📚")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Title and description
14
- st.title("📚 Learning with Fun")
15
- st.markdown("**بچوں کے لیے سوال و جواب اردو اور انگلش میں**")
 
 
 
 
16
 
17
- # Load GROQ API key from environment
18
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
19
- if not GROQ_API_KEY:
20
- st.error("🚨 GROQ_API_KEY is missing! Please set it in Hugging Face secrets.")
21
- st.stop()
22
 
23
- # Load FAISS index
24
- try:
25
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
26
- vectorstore = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
27
- except Exception as e:
28
- st.error(f"❌ Failed to load FAISS index: {e}")
29
- st.stop()
30
 
31
- # Initialize LLaMA3 model via GROQ
32
- llm = ChatGroq(
33
- groq_api_key=GROQ_API_KEY,
34
- model_name="llama3-8b-8192"
35
- )
 
 
 
 
36
 
37
- # Create retrieval chain
38
- qa_chain = RetrievalQA.from_chain_type(
39
- llm=llm,
40
- retriever=vectorstore.as_retriever()
41
- )
 
42
 
43
- # Input box
44
- query = st.text_input("🧠 اپنا سوال درج کریں (اردو یا انگریزی میں):")
 
 
 
 
 
 
 
 
 
 
45
 
46
- if query:
47
- with st.spinner("جواب تیار ہو رہا ہے..."):
48
- try:
49
- # Get simple answer
50
- answer = qa_chain.run(query)
51
-
52
- # Display basic explanation
53
- st.markdown("### 📖 آسان جواب:")
54
- st.write(answer)
 
 
55
 
56
- # Storytelling version
57
- story = f"ایک دن ایک بچے نے پوچھا: '{query}'۔ استاد نے مسکرا کر کہا: {answer}"
58
- st.markdown("### 🧚 کہانی کی صورت میں:")
59
- st.write(story)
60
 
61
- # Convert story to Urdu voice
62
- tts = gTTS(text=story, lang='ur')
63
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmpfile:
64
- tts.save(tmpfile.name)
65
- st.audio(tmpfile.name, format="audio/mp3")
66
- except Exception as e:
67
- st.error(f"❌ Error generating answer: {e}")
 
 
1
  import streamlit as st
2
+ from langchain_community.vectorstores import FAISS
3
  from langchain.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader, UnstructuredFileLoader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain_community.llms import ChatGroq
7
  from langchain.chains import RetrievalQA
 
8
  from gtts import gTTS
9
+ from langdetect import detect
10
+ import os
11
+ import shutil
12
+ import pickle
13
  import tempfile
14
 
15
+ # Title
16
+ st.set_page_config(page_title="Learning with Fun 👦📚", layout="centered")
17
+ st.title("🎓 Learning with Fun – Grade 5 & 6 📘")
18
+ st.markdown("Ask your questions in Urdu or English. Get simple and storytelling-style answers!")
19
+
20
+ # Temp directory for file upload
21
+ temp_dir = tempfile.mkdtemp()
22
+
23
+ # File upload
24
+ uploaded_file = st.file_uploader("📄 Upload a textbook file (PDF, DOCX, or TXT)", type=["pdf", "docx", "txt"])
25
+
26
+ # Load documents
27
+ def load_document(file_path):
28
+ if file_path.endswith(".pdf"):
29
+ loader = PyMuPDFLoader(file_path)
30
+ elif file_path.endswith(".docx"):
31
+ loader = Docx2txtLoader(file_path)
32
+ else:
33
+ loader = UnstructuredFileLoader(file_path)
34
+ return loader.load()
35
 
36
+ # Vector DB functions
37
+ def create_vectorstore(docs):
38
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
39
+ texts = text_splitter.split_documents(docs)
40
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
41
+ db = FAISS.from_documents(texts, embeddings)
42
+ return db
43
 
44
+ # Load or create vectorstore
45
+ def get_vectorstore(file):
46
+ file_path = os.path.join(temp_dir, file.name)
47
+ with open(file_path, "wb") as f:
48
+ f.write(file.getbuffer())
49
 
50
+ pickle_path = os.path.join(temp_dir, f"{file.name}.pkl")
 
 
 
 
 
 
51
 
52
+ if os.path.exists(pickle_path):
53
+ with open(pickle_path, "rb") as f:
54
+ db = pickle.load(f)
55
+ else:
56
+ docs = load_document(file_path)
57
+ db = create_vectorstore(docs)
58
+ with open(pickle_path, "wb") as f:
59
+ pickle.dump(db, f)
60
+ return db
61
 
62
+ # Generate text to speech
63
+ def generate_tts(text, lang):
64
+ tts = gTTS(text, lang=lang)
65
+ output_path = os.path.join(temp_dir, "output.mp3")
66
+ tts.save(output_path)
67
+ return output_path
68
 
69
+ # Run QA chain
70
+ def run_chain(db, query, response_type):
71
+ llm = ChatGroq(temperature=0.3, model_name="llama3-8b-8192")
72
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever())
73
+
74
+ if response_type == "Explain Simply":
75
+ prompt = f"Explain this to a Grade 5 kid in simple Urdu:\n{query}"
76
+ else:
77
+ prompt = f"Tell a short fun story in Urdu to explain:\n{query}"
78
+
79
+ result = qa_chain.run(prompt)
80
+ return result
81
 
82
+ # UI Controls
83
+ if uploaded_file:
84
+ query = st.text_input("❓ Ask a question:")
85
+ response_type = st.radio("Select answer format:", ["Explain Simply", "Storytelling"])
86
+
87
+ if query:
88
+ with st.spinner("🤖 Thinking..."):
89
+ db = get_vectorstore(uploaded_file)
90
+ answer = run_chain(db, query, response_type)
91
+ st.markdown("### 🧠 Answer:")
92
+ st.success(answer)
93
 
94
+ # Detect language and speak
95
+ lang = "ur" if detect(answer) == "ur" else "en"
96
+ audio_path = generate_tts(answer, lang)
97
+ st.audio(audio_path, format="audio/mp3")
98
 
99
+ # Cleanup
100
+ st.markdown("---")
101
+ st.caption("Made with 💙 using Streamlit + LLaMA 3 + Hugging Face")