|
|
|
|
|
|
|
|
import os |
|
|
import tempfile |
|
|
import streamlit as st |
|
|
import requests |
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader, UnstructuredImageLoader |
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
from langchain_core.documents import Document |
|
|
from gtts import gTTS |
|
|
import base64 |
|
|
import shutil |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Learning with Fun", layout="wide") |
|
|
st.markdown(""" |
|
|
<style> |
|
|
.main { |
|
|
background-color: #f0f8ff; |
|
|
} |
|
|
.block-container { |
|
|
padding-top: 2rem; |
|
|
} |
|
|
.stSelectbox > label, .stTextInput > label { |
|
|
font-size: 18px; |
|
|
font-weight: bold; |
|
|
color: #2e7d32; |
|
|
} |
|
|
.stTextInput input { |
|
|
font-size: 16px; |
|
|
padding: 10px; |
|
|
border-radius: 10px; |
|
|
} |
|
|
.title-container { |
|
|
display: flex; |
|
|
align-items: center; |
|
|
gap: 20px; |
|
|
} |
|
|
.title-container img { |
|
|
height: 80px; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
|
<div class="title-container"> |
|
|
<img src="https://cdn-icons-png.flaticon.com/512/201/201623.png" alt="Kids Book"> |
|
|
<div> |
|
|
<h1>π Learning with Fun π</h1> |
|
|
<h4>Helping Kids Learn Through Interactive Books, Questions & Stories!</h4> |
|
|
</div> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
grade = st.selectbox("Select your Grade", ["Grade 5", "Grade 6"]) |
|
|
subject = st.selectbox("Select Subject", ["Science", "Math", "English"]) |
|
|
|
|
|
uploaded_files = st.file_uploader("Upload textbook files (PDF, DOCX, JPEG)", type=["pdf", "docx", "jpg", "jpeg"], accept_multiple_files=True) |
|
|
question = st.text_input("Ask your question in English or Urdu", value="" if 'last_question' not in st.session_state else st.session_state.last_question) |
|
|
|
|
|
submit_btn = st.button("π¬ Submit Question") |
|
|
clear_btn = st.button("π§Ή Clear") |
|
|
|
|
|
|
|
|
groq_api_key = os.getenv("GROQ_API_KEY", "") |
|
|
if not groq_api_key: |
|
|
st.warning("GROQ API key is not set in the environment. Please configure it as a Hugging Face Secret with the name 'GROQ_API_KEY'.") |
|
|
|
|
|
|
|
|
temp_dir = tempfile.mkdtemp() |
|
|
|
|
|
|
|
|
def load_documents(uploaded_files): |
|
|
docs = [] |
|
|
for file in uploaded_files: |
|
|
ext = file.name.split(".")[-1].lower() |
|
|
path = os.path.join(temp_dir, file.name) |
|
|
with open(path, "wb") as f: |
|
|
f.write(file.read()) |
|
|
|
|
|
if ext == "pdf": |
|
|
loader = PyMuPDFLoader(path) |
|
|
elif ext == "docx": |
|
|
loader = Docx2txtLoader(path) |
|
|
elif ext in ["jpg", "jpeg"]: |
|
|
loader = UnstructuredImageLoader(path) |
|
|
else: |
|
|
continue |
|
|
docs.extend(loader.load()) |
|
|
return docs |
|
|
|
|
|
def split_documents(documents): |
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) |
|
|
return splitter.split_documents(documents) |
|
|
|
|
|
def create_vector_store(chunks): |
|
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
return FAISS.from_documents(chunks, embeddings) |
|
|
|
|
|
def retrieve_docs(query, vector_store): |
|
|
return vector_store.similarity_search(query, k=3) |
|
|
|
|
|
def query_llm_groq(context, query, groq_api_key): |
|
|
url = "https://api.groq.com/openai/v1/chat/completions" |
|
|
headers = { |
|
|
"Authorization": f"Bearer {groq_api_key}", |
|
|
"Content-Type": "application/json" |
|
|
} |
|
|
prompt = f""" |
|
|
Context: |
|
|
{context} |
|
|
|
|
|
Question: |
|
|
{query} |
|
|
|
|
|
Provide two outputs: |
|
|
1. A simple, educational explanation in English + Urdu. |
|
|
2. A creative storytelling version mixing English and Urdu. |
|
|
""" |
|
|
data = { |
|
|
"model": "llama3-8b-8192", |
|
|
"messages": [ |
|
|
{"role": "user", "content": prompt} |
|
|
], |
|
|
"temperature": 0.7 |
|
|
} |
|
|
response = requests.post(url, headers=headers, json=data) |
|
|
response.raise_for_status() |
|
|
result = response.json() |
|
|
return result["choices"][0]["message"]["content"] |
|
|
|
|
|
def generate_audio(text, lang='ur'): |
|
|
tts = gTTS(text, lang=lang) |
|
|
audio_path = os.path.join(temp_dir, "response.mp3") |
|
|
tts.save(audio_path) |
|
|
with open(audio_path, "rb") as audio_file: |
|
|
audio_bytes = audio_file.read() |
|
|
b64 = base64.b64encode(audio_bytes).decode() |
|
|
audio_html = f'<audio controls><source src="data:audio/mp3;base64,{b64}" type="audio/mp3"></audio>' |
|
|
return audio_html |
|
|
|
|
|
|
|
|
if submit_btn and question and uploaded_files and groq_api_key: |
|
|
with st.spinner("Processing your documents and generating answer..."): |
|
|
documents = load_documents(uploaded_files) |
|
|
chunks = split_documents(documents) |
|
|
vector_db = create_vector_store(chunks) |
|
|
|
|
|
results = retrieve_docs(question, vector_db) |
|
|
context_text = "\n".join([doc.page_content for doc in results]) |
|
|
answer = query_llm_groq(context_text, question, groq_api_key) |
|
|
|
|
|
st.session_state.answer = answer |
|
|
st.session_state.last_question = question |
|
|
|
|
|
if 'answer' in st.session_state: |
|
|
st.markdown("### π Answer") |
|
|
parts = st.session_state.answer.split("2.") |
|
|
if len(parts) == 2: |
|
|
st.markdown(f"**Explanation:**\n{parts[0]}") |
|
|
st.markdown(f"**Storytelling:**\n{parts[1]}") |
|
|
|
|
|
if st.button("π Play Storytelling Voice"): |
|
|
st.markdown(generate_audio(parts[1]), unsafe_allow_html=True) |
|
|
else: |
|
|
st.markdown(st.session_state.answer) |
|
|
|
|
|
if clear_btn: |
|
|
if 'answer' in st.session_state: |
|
|
del st.session_state['answer'] |
|
|
|
|
|
|
|
|
if os.path.exists(temp_dir): |
|
|
shutil.rmtree(temp_dir) |
|
|
|