Update app.py
Browse files
app.py
CHANGED
|
@@ -33,11 +33,26 @@ st.markdown("""
|
|
| 33 |
padding: 10px;
|
| 34 |
border-radius: 10px;
|
| 35 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
</style>
|
| 37 |
""", unsafe_allow_html=True)
|
| 38 |
|
| 39 |
-
st.
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
# ----------------------------- USER INPUT -----------------------------------
|
| 43 |
grade = st.selectbox("Select your Grade", ["Grade 5", "Grade 6"])
|
|
@@ -46,6 +61,12 @@ subject = st.selectbox("Select Subject", ["Science", "Math", "English"])
|
|
| 46 |
uploaded_files = st.file_uploader("Upload textbook files (PDF, DOCX, JPEG)", type=["pdf", "docx", "jpg", "jpeg"], accept_multiple_files=True)
|
| 47 |
question = st.text_input("Ask your question in English or Urdu")
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# ----------------------------- ENV VAR SETUP -----------------------------------
|
| 50 |
groq_api_key = os.getenv("GROQ_API_KEY", "")
|
| 51 |
if not groq_api_key:
|
|
@@ -56,7 +77,6 @@ temp_dir = tempfile.mkdtemp()
|
|
| 56 |
|
| 57 |
# ------------------------- UTILITY FUNCTIONS -------------------------------
|
| 58 |
def load_documents(uploaded_files):
|
| 59 |
-
"""Load various file types into LangChain Document format."""
|
| 60 |
docs = []
|
| 61 |
for file in uploaded_files:
|
| 62 |
ext = file.name.split(".")[-1].lower()
|
|
@@ -76,28 +96,22 @@ def load_documents(uploaded_files):
|
|
| 76 |
return docs
|
| 77 |
|
| 78 |
def split_documents(documents):
|
| 79 |
-
"""Split documents into smaller chunks."""
|
| 80 |
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
| 81 |
return splitter.split_documents(documents)
|
| 82 |
|
| 83 |
def create_vector_store(chunks):
|
| 84 |
-
"""Create FAISS vector DB from text chunks."""
|
| 85 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 86 |
return FAISS.from_documents(chunks, embeddings)
|
| 87 |
|
| 88 |
def retrieve_docs(query, vector_store):
|
| 89 |
-
"""Search FAISS for relevant chunks."""
|
| 90 |
return vector_store.similarity_search(query, k=3)
|
| 91 |
|
| 92 |
def query_llm_groq(context, query, groq_api_key):
|
| 93 |
-
"""Query GROQ LLaMA 3 API directly and return formatted answers."""
|
| 94 |
url = "https://api.groq.com/openai/v1/chat/completions"
|
| 95 |
-
|
| 96 |
headers = {
|
| 97 |
"Authorization": f"Bearer {groq_api_key}",
|
| 98 |
"Content-Type": "application/json"
|
| 99 |
}
|
| 100 |
-
|
| 101 |
prompt = f"""
|
| 102 |
Context:
|
| 103 |
{context}
|
|
@@ -109,7 +123,6 @@ def query_llm_groq(context, query, groq_api_key):
|
|
| 109 |
1. A simple, educational explanation in English + Urdu.
|
| 110 |
2. A creative storytelling version mixing English and Urdu.
|
| 111 |
"""
|
| 112 |
-
|
| 113 |
data = {
|
| 114 |
"model": "llama3-8b-8192",
|
| 115 |
"messages": [
|
|
@@ -117,14 +130,12 @@ def query_llm_groq(context, query, groq_api_key):
|
|
| 117 |
],
|
| 118 |
"temperature": 0.7
|
| 119 |
}
|
| 120 |
-
|
| 121 |
response = requests.post(url, headers=headers, json=data)
|
| 122 |
response.raise_for_status()
|
| 123 |
result = response.json()
|
| 124 |
return result["choices"][0]["message"]["content"]
|
| 125 |
|
| 126 |
def generate_audio(text, lang='ur'):
|
| 127 |
-
"""Convert text to audio using gTTS and return playable audio HTML."""
|
| 128 |
tts = gTTS(text, lang=lang)
|
| 129 |
audio_path = os.path.join(temp_dir, "response.mp3")
|
| 130 |
tts.save(audio_path)
|
|
@@ -135,7 +146,7 @@ def generate_audio(text, lang='ur'):
|
|
| 135 |
return audio_html
|
| 136 |
|
| 137 |
# ----------------------------- MAIN LOGIC ----------------------------------
|
| 138 |
-
if question and uploaded_files and groq_api_key:
|
| 139 |
with st.spinner("Processing your documents and generating answer..."):
|
| 140 |
documents = load_documents(uploaded_files)
|
| 141 |
chunks = split_documents(documents)
|
|
|
|
| 33 |
padding: 10px;
|
| 34 |
border-radius: 10px;
|
| 35 |
}
|
| 36 |
+
.title-container {
|
| 37 |
+
display: flex;
|
| 38 |
+
align-items: center;
|
| 39 |
+
gap: 20px;
|
| 40 |
+
}
|
| 41 |
+
.title-container img {
|
| 42 |
+
height: 80px;
|
| 43 |
+
}
|
| 44 |
</style>
|
| 45 |
""", unsafe_allow_html=True)
|
| 46 |
|
| 47 |
+
st.markdown("""
|
| 48 |
+
<div class="title-container">
|
| 49 |
+
<img src="https://cdn-icons-png.flaticon.com/512/201/201623.png" alt="Kids Book">
|
| 50 |
+
<div>
|
| 51 |
+
<h1>π Learning with Fun π</h1>
|
| 52 |
+
<h4>Helping Kids Learn Through Interactive Books, Questions & Stories!</h4>
|
| 53 |
+
</div>
|
| 54 |
+
</div>
|
| 55 |
+
""", unsafe_allow_html=True)
|
| 56 |
|
| 57 |
# ----------------------------- USER INPUT -----------------------------------
|
| 58 |
grade = st.selectbox("Select your Grade", ["Grade 5", "Grade 6"])
|
|
|
|
| 61 |
uploaded_files = st.file_uploader("Upload textbook files (PDF, DOCX, JPEG)", type=["pdf", "docx", "jpg", "jpeg"], accept_multiple_files=True)
|
| 62 |
question = st.text_input("Ask your question in English or Urdu")
|
| 63 |
|
| 64 |
+
submit_btn = st.button("π¬ Submit Question")
|
| 65 |
+
clear_btn = st.button("π§Ή Clear")
|
| 66 |
+
|
| 67 |
+
if clear_btn:
|
| 68 |
+
st.experimental_rerun()
|
| 69 |
+
|
| 70 |
# ----------------------------- ENV VAR SETUP -----------------------------------
|
| 71 |
groq_api_key = os.getenv("GROQ_API_KEY", "")
|
| 72 |
if not groq_api_key:
|
|
|
|
| 77 |
|
| 78 |
# ------------------------- UTILITY FUNCTIONS -------------------------------
|
| 79 |
def load_documents(uploaded_files):
|
|
|
|
| 80 |
docs = []
|
| 81 |
for file in uploaded_files:
|
| 82 |
ext = file.name.split(".")[-1].lower()
|
|
|
|
| 96 |
return docs
|
| 97 |
|
| 98 |
def split_documents(documents):
|
|
|
|
| 99 |
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
| 100 |
return splitter.split_documents(documents)
|
| 101 |
|
| 102 |
def create_vector_store(chunks):
|
|
|
|
| 103 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 104 |
return FAISS.from_documents(chunks, embeddings)
|
| 105 |
|
| 106 |
def retrieve_docs(query, vector_store):
|
|
|
|
| 107 |
return vector_store.similarity_search(query, k=3)
|
| 108 |
|
| 109 |
def query_llm_groq(context, query, groq_api_key):
|
|
|
|
| 110 |
url = "https://api.groq.com/openai/v1/chat/completions"
|
|
|
|
| 111 |
headers = {
|
| 112 |
"Authorization": f"Bearer {groq_api_key}",
|
| 113 |
"Content-Type": "application/json"
|
| 114 |
}
|
|
|
|
| 115 |
prompt = f"""
|
| 116 |
Context:
|
| 117 |
{context}
|
|
|
|
| 123 |
1. A simple, educational explanation in English + Urdu.
|
| 124 |
2. A creative storytelling version mixing English and Urdu.
|
| 125 |
"""
|
|
|
|
| 126 |
data = {
|
| 127 |
"model": "llama3-8b-8192",
|
| 128 |
"messages": [
|
|
|
|
| 130 |
],
|
| 131 |
"temperature": 0.7
|
| 132 |
}
|
|
|
|
| 133 |
response = requests.post(url, headers=headers, json=data)
|
| 134 |
response.raise_for_status()
|
| 135 |
result = response.json()
|
| 136 |
return result["choices"][0]["message"]["content"]
|
| 137 |
|
| 138 |
def generate_audio(text, lang='ur'):
|
|
|
|
| 139 |
tts = gTTS(text, lang=lang)
|
| 140 |
audio_path = os.path.join(temp_dir, "response.mp3")
|
| 141 |
tts.save(audio_path)
|
|
|
|
| 146 |
return audio_html
|
| 147 |
|
| 148 |
# ----------------------------- MAIN LOGIC ----------------------------------
|
| 149 |
+
if submit_btn and question and uploaded_files and groq_api_key:
|
| 150 |
with st.spinner("Processing your documents and generating answer..."):
|
| 151 |
documents = load_documents(uploaded_files)
|
| 152 |
chunks = split_documents(documents)
|