MiakOnline commited on
Commit
8c8ad75
Β·
verified Β·
1 Parent(s): ceab0a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -14
app.py CHANGED
@@ -3,30 +3,34 @@
3
  import os
4
  import tempfile
5
  import streamlit as st
6
-
7
- # βœ… Make sure you have installed langchain-community
8
- # pip install -U langchain langchain-community
9
  from langchain_community.vectorstores import FAISS
10
  from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader, UnstructuredImageLoader
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
12
-
13
  from langchain.text_splitter import RecursiveCharacterTextSplitter
14
  from langchain_core.documents import Document
15
  from gtts import gTTS
16
  import base64
17
  import shutil
18
 
 
19
  st.set_page_config(page_title="Learning with Fun", layout="wide")
20
  st.title("πŸ“š Learning with Fun - Educational Q&A for Kids")
21
 
 
22
  grade = st.selectbox("Select your Grade", ["Grade 5", "Grade 6"])
23
  subject = st.selectbox("Select Subject", ["Science", "Math", "English"])
 
24
  uploaded_files = st.file_uploader("Upload textbook files (PDF, DOCX, JPEG)", type=["pdf", "docx", "jpg", "jpeg"], accept_multiple_files=True)
25
  question = st.text_input("Ask your question in English or Urdu")
 
26
 
 
27
  temp_dir = tempfile.mkdtemp()
28
 
 
29
  def load_documents(uploaded_files):
 
30
  docs = []
31
  for file in uploaded_files:
32
  ext = file.name.split(".")[-1].lower()
@@ -46,19 +50,27 @@ def load_documents(uploaded_files):
46
  return docs
47
 
48
  def split_documents(documents):
 
49
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
50
  return splitter.split_documents(documents)
51
 
52
  def create_vector_store(chunks):
 
53
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
54
  return FAISS.from_documents(chunks, embeddings)
55
 
56
  def retrieve_docs(query, vector_store):
 
57
  return vector_store.similarity_search(query, k=3)
58
 
59
- def query_llm_groq(context, query):
60
- from openai import OpenAI
61
- client = OpenAI(api_key=os.getenv("GROQ_API_KEY"), base_url="https://api.groq.com/openai/v1")
 
 
 
 
 
62
 
63
  prompt = f"""
64
  Context:
@@ -71,13 +83,22 @@ def query_llm_groq(context, query):
71
  1. A simple, educational explanation in English + Urdu.
72
  2. A creative storytelling version mixing English and Urdu.
73
  """
74
- response = client.chat.completions.create(
75
- model="llama3-8b-8192",
76
- messages=[{"role": "user", "content": prompt}]
77
- )
78
- return response.choices[0].message.content
 
 
 
 
 
 
 
 
79
 
80
  def generate_audio(text, lang='ur'):
 
81
  tts = gTTS(text, lang=lang)
82
  audio_path = os.path.join(temp_dir, "response.mp3")
83
  tts.save(audio_path)
@@ -87,7 +108,8 @@ def generate_audio(text, lang='ur'):
87
  audio_html = f'<audio autoplay controls><source src="data:audio/mp3;base64,{b64}" type="audio/mp3"></audio>'
88
  return audio_html
89
 
90
- if question and uploaded_files:
 
91
  with st.spinner("Processing your documents..."):
92
  documents = load_documents(uploaded_files)
93
  chunks = split_documents(documents)
@@ -95,7 +117,7 @@ if question and uploaded_files:
95
 
96
  results = retrieve_docs(question, vector_db)
97
  context_text = "\n".join([doc.page_content for doc in results])
98
- answer = query_llm_groq(context_text, question)
99
 
100
  st.markdown("### πŸ“˜ Answer")
101
  parts = answer.split("2.")
@@ -106,5 +128,6 @@ if question and uploaded_files:
106
  else:
107
  st.markdown(answer)
108
 
 
109
  if os.path.exists(temp_dir):
110
  shutil.rmtree(temp_dir)
 
3
  import os
4
  import tempfile
5
  import streamlit as st
6
+ import requests
 
 
7
  from langchain_community.vectorstores import FAISS
8
  from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader, UnstructuredImageLoader
9
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain_core.documents import Document
12
  from gtts import gTTS
13
  import base64
14
  import shutil
15
 
16
+ # ----------------------------- UI SETUP --------------------------------------
17
  st.set_page_config(page_title="Learning with Fun", layout="wide")
18
  st.title("πŸ“š Learning with Fun - Educational Q&A for Kids")
19
 
20
+ # ----------------------------- USER INPUT -----------------------------------
21
  grade = st.selectbox("Select your Grade", ["Grade 5", "Grade 6"])
22
  subject = st.selectbox("Select Subject", ["Science", "Math", "English"])
23
+
24
  uploaded_files = st.file_uploader("Upload textbook files (PDF, DOCX, JPEG)", type=["pdf", "docx", "jpg", "jpeg"], accept_multiple_files=True)
25
  question = st.text_input("Ask your question in English or Urdu")
26
+ groq_api_key = st.text_input("πŸ” Enter your GROQ API Key", type="password")
27
 
28
+ # ------------------------- SETUP TEMP FOLDER -------------------------------
29
  temp_dir = tempfile.mkdtemp()
30
 
31
+ # ------------------------- UTILITY FUNCTIONS -------------------------------
32
  def load_documents(uploaded_files):
33
+ """Load various file types into LangChain Document format."""
34
  docs = []
35
  for file in uploaded_files:
36
  ext = file.name.split(".")[-1].lower()
 
50
  return docs
51
 
52
  def split_documents(documents):
53
+ """Split documents into smaller chunks."""
54
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
55
  return splitter.split_documents(documents)
56
 
57
  def create_vector_store(chunks):
58
+ """Create FAISS vector DB from text chunks."""
59
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
60
  return FAISS.from_documents(chunks, embeddings)
61
 
62
  def retrieve_docs(query, vector_store):
63
+ """Search FAISS for relevant chunks."""
64
  return vector_store.similarity_search(query, k=3)
65
 
66
+ def query_llm_groq(context, query, groq_api_key):
67
+ """Query GROQ LLaMA 3 API directly and return formatted answers."""
68
+ url = "https://api.groq.com/openai/v1/chat/completions"
69
+
70
+ headers = {
71
+ "Authorization": f"Bearer {groq_api_key}",
72
+ "Content-Type": "application/json"
73
+ }
74
 
75
  prompt = f"""
76
  Context:
 
83
  1. A simple, educational explanation in English + Urdu.
84
  2. A creative storytelling version mixing English and Urdu.
85
  """
86
+
87
+ data = {
88
+ "model": "llama3-8b-8192",
89
+ "messages": [
90
+ {"role": "user", "content": prompt}
91
+ ],
92
+ "temperature": 0.7
93
+ }
94
+
95
+ response = requests.post(url, headers=headers, json=data)
96
+ response.raise_for_status()
97
+ result = response.json()
98
+ return result["choices"][0]["message"]["content"]
99
 
100
  def generate_audio(text, lang='ur'):
101
+ """Convert text to audio using gTTS and return playable audio HTML."""
102
  tts = gTTS(text, lang=lang)
103
  audio_path = os.path.join(temp_dir, "response.mp3")
104
  tts.save(audio_path)
 
108
  audio_html = f'<audio autoplay controls><source src="data:audio/mp3;base64,{b64}" type="audio/mp3"></audio>'
109
  return audio_html
110
 
111
+ # ----------------------------- MAIN LOGIC ----------------------------------
112
+ if question and uploaded_files and groq_api_key:
113
  with st.spinner("Processing your documents..."):
114
  documents = load_documents(uploaded_files)
115
  chunks = split_documents(documents)
 
117
 
118
  results = retrieve_docs(question, vector_db)
119
  context_text = "\n".join([doc.page_content for doc in results])
120
+ answer = query_llm_groq(context_text, question, groq_api_key)
121
 
122
  st.markdown("### πŸ“˜ Answer")
123
  parts = answer.split("2.")
 
128
  else:
129
  st.markdown(answer)
130
 
131
+ # ----------------------------- CLEANUP --------------------------------------
132
  if os.path.exists(temp_dir):
133
  shutil.rmtree(temp_dir)