| import streamlit as st | |
| import tempfile | |
| from extractor.pdf_parser import extract_text_from_pdf | |
| from utils.validators import is_text_based_pdf | |
| from chunker.text_chunker import pages_to_chunks | |
| from embeddings.embedder import Embedder | |
| from embeddings.similarity import top_k_similar_chunks | |
| from utils.context_builder import build_context | |
| from llm.explainer import PaperExplainer | |
| from llm.question_bank import QUESTION_BANK | |
| st.set_page_config(page_title="AI Research Paper Explainer", layout="wide") | |
| st.title("π AI Research Paper Explainer") | |
| st.caption("Built with Hugging Face Β· CPU-only Β· No paid APIs") | |
| uploaded_file = st.file_uploader( | |
| "Upload a text-based research paper (PDF)", | |
| type=["pdf"] | |
| ) | |
| mode = st.selectbox( | |
| "Choose analysis mode", | |
| options=list(QUESTION_BANK.keys()) | |
| ) | |
| if uploaded_file: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp.write(uploaded_file.read()) | |
| pdf_path = tmp.name | |
| with st.spinner("Extracting and analyzing paper..."): | |
| pages = extract_text_from_pdf(pdf_path) | |
| if not is_text_based_pdf(pages): | |
| st.error("β This appears to be a scanned PDF. Please upload a text-based paper.") | |
| st.stop() | |
| chunks = pages_to_chunks(pages) | |
| embedder = Embedder() | |
| chunk_embeddings = embedder.embed(chunks) | |
| query_embedding = embedder.embed([pages[0]]) | |
| top_chunks = top_k_similar_chunks( | |
| query_embedding, | |
| chunk_embeddings, | |
| chunks, | |
| k=5 | |
| ) | |
| context = build_context(top_chunks) | |
| explainer = PaperExplainer() | |
| question = QUESTION_BANK[mode] | |
| answer = explainer.explain(context, question) | |
| st.subheader(f"π§ Result β {mode.upper()}") | |
| st.write(answer) | |
| with st.expander("π Retrieved Context (Top-K Chunks)"): | |
| for i, c in enumerate(top_chunks, 1): | |
| st.markdown(f"**Chunk {i} (score: {c['score']:.2f})**") | |
| st.write(c["text"][:500]) | |