ai-research-paper-explainer / streamlit_app.py
adarshninja's picture
Upload 20 files
0fda845 verified
import streamlit as st
import tempfile
from extractor.pdf_parser import extract_text_from_pdf
from utils.validators import is_text_based_pdf
from chunker.text_chunker import pages_to_chunks
from embeddings.embedder import Embedder
from embeddings.similarity import top_k_similar_chunks
from utils.context_builder import build_context
from llm.explainer import PaperExplainer
from llm.question_bank import QUESTION_BANK
st.set_page_config(page_title="AI Research Paper Explainer", layout="wide")
st.title("πŸ“„ AI Research Paper Explainer")
st.caption("Built with Hugging Face Β· CPU-only Β· No paid APIs")
uploaded_file = st.file_uploader(
"Upload a text-based research paper (PDF)",
type=["pdf"]
)
mode = st.selectbox(
"Choose analysis mode",
options=list(QUESTION_BANK.keys())
)
if uploaded_file:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(uploaded_file.read())
pdf_path = tmp.name
with st.spinner("Extracting and analyzing paper..."):
pages = extract_text_from_pdf(pdf_path)
if not is_text_based_pdf(pages):
st.error("❌ This appears to be a scanned PDF. Please upload a text-based paper.")
st.stop()
chunks = pages_to_chunks(pages)
embedder = Embedder()
chunk_embeddings = embedder.embed(chunks)
query_embedding = embedder.embed([pages[0]])
top_chunks = top_k_similar_chunks(
query_embedding,
chunk_embeddings,
chunks,
k=5
)
context = build_context(top_chunks)
explainer = PaperExplainer()
question = QUESTION_BANK[mode]
answer = explainer.explain(context, question)
st.subheader(f"🧠 Result β€” {mode.upper()}")
st.write(answer)
with st.expander("πŸ” Retrieved Context (Top-K Chunks)"):
for i, c in enumerate(top_chunks, 1):
st.markdown(f"**Chunk {i} (score: {c['score']:.2f})**")
st.write(c["text"][:500])