import streamlit as st
from utils import *
from main_IO import *
from download_questions import create_docx_from_data
from backend.raw_text_processing import *
from backend.chromadb_utils import *
import os
import sys
import logging
# Add the root folder (one level above 'app') to sys.path
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
if root_path not in sys.path:
sys.path.insert(0, root_path)
# Configuration
configure_page()
initialise_session_state()
apply_style()
# add_sidebar_header()
st.sidebar.html("""
Menu
""")
# Initialize chromadb variables
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
model_path = "./chromadb_model"
# Set-up Logger
st.session_state.use_logger = False
if st.session_state.use_logger:
level = st.selectbox("Logging level", ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"])
logging.getLogger().setLevel(getattr(logging, level))
# Set default page if not specified
if "page" not in st.query_params:
st.query_params.page = "main"
# Navigation handling
if st.query_params.page == "topic":
st.switch_page("pages/2_topic_questions.py")
elif st.query_params.page == "chapter":
st.switch_page("pages/1_chapter_questions.py")
elif st.query_params.page == "inspect":
st.switch_page("pages/3_inspect_pdf.py")
else:
# Welcome message
st.title("Welcome to Text2Test!")
st.divider()
st.markdown("""
Welcome! This app helps you transform your PDFs or texts into interactive study materials by generating meaningful questions.
You can either:
- Generate questions based on specific topics or keywords
- Generate questions from a selected chapter
Start by uploading your PDF file, then choose your preferred way to generate questions using the options below.
Let’s make studying smarter and more engaging!
""")
st.divider()
# Upload PDF file
st.subheader("Upload your PDF file")
upload_pdf()
st.divider()
# Check if PDF has changed or needs processing
if st.session_state.get("pdf_changed") or (
st.session_state.get("full_text") is None and
st.session_state.get("uploaded_pdf_bytes") is not None
):
process_pdf() # Extract text from PDF
with st.spinner("Extracting information from the text. This can take up to 1 minute."):
client, embedding_func = initialize_chromadb(EMBEDDING_MODEL, model_path)
whole_text_collection = initialize_collection(client, embedding_func, "whole_text_chunks")
update_collection(
whole_text_collection,
st.session_state.get("full_text"),
max_words=200,
min_words=100,
overlap_sentences=3
)
st.session_state["pdf_changed"] = False # Reset flag after processing
try:
uploaded_pdf_name = st.session_state.get('uploaded_pdf_name', None)
if uploaded_pdf_name:
st.info(f"Uploaded PDF: {uploaded_pdf_name}")
debug_log(f"book title: {uploaded_pdf_name}")
else:
pass
show_pdf_preview()
except Exception as e:
debug_log(f"Error displaying PDF info or preview: {e}")
# Main content buttons
st.subheader("Generate Questions")
st.write("Please choose an option to generate questions:")
breaks(1)
cols = st.columns(2)
st.html("""
""")
with cols[0]:
if st.button("Generate Questions on a Topic", key="main_topic"):
st.query_params.page = "topic"
st.rerun()
with cols[1]:
if st.button("Generate Questions from a Chapter", key="main_chapter"):
st.query_params.page = "chapter"
st.rerun()
if st.session_state.get('questions_to_download'):
with st.sidebar:
st.markdown("---") # Divider
st.markdown("**Download Questions**") # Spacing
docx_file = create_docx_from_data(st.session_state.get('questions_to_download', {}))
st.download_button(
label="📄 Download as Word (.docx)",
data=docx_file,
file_name="questions.docx",
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
on_click="ignore"
)
else:
with st.sidebar:
st.markdown("---")