\n\n"
"Do NOT include explanations. Keep each question short and clear."
)
prompt = [
{"role": "system", "content": "You are an experienced instructor who writes high-quality MCQs."},
{"role": "user", "content": instruction + "\n\nDocument:\n\n" + full_text}
]
return call_chat_completion(prompt, model=model, max_tokens=2200, temperature=0.3)
def answer_question(question: str, chunks: List[str], chunk_embs: List[List[float]], emb_model: str, gen_model: str, top_k: int = 4) -> str:
"""
Retrieval-augmented answer: pick top_k chunks and ask model to answer using only that context.
"""
selected_chunks, idx = top_k_chunks(question, chunks, chunk_embs, k=top_k, emb_model=emb_model)
context = "\n\n---\n\n".join(selected_chunks)
prompt = [
{"role": "system", "content": "You are an assistant that answers questions using the provided context. If the answer is not in the context, say you could not find it."},
{"role": "user", "content": f"Context:\n\n{context}\n\nQuestion: {question}\n\nAnswer concisely and cite which chunk indexes (0-based) you used."}
]
return call_chat_completion(prompt, model=gen_model, max_tokens=400, temperature=0.2)
# -------------------------
# Download helpers
# -------------------------
def make_text_download(content: str, filename: str = "study_package.md"):
b64 = base64.b64encode(content.encode()).decode()
href = f'Download {filename}'
return href
# -------------------------
# Session state initialization
# -------------------------
if "qa_history" not in st.session_state:
st.session_state["qa_history"] = [] # list of dicts: question, answer, time
if "summary" not in st.session_state:
st.session_state["summary"] = None
if "mcq_text" not in st.session_state:
st.session_state["mcq_text"] = None
if "chunks" not in st.session_state:
st.session_state["chunks"] = None
if "chunk_embeddings" not in st.session_state:
st.session_state["chunk_embeddings"] = None
# -------------------------
# App UI layout
# -------------------------
st.title("📘 AI Study Assistant")
st.caption("Upload a PDF and generate a summary, 25+ MCQs, and interactively ask questions about the content.")
# Main layout: left column for upload + actions, right for results
left_col, right_col = st.columns([1.4, 2])
with left_col:
st.header("Upload & Settings")
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"], help="Choose a PDF with selectable text for best results.")
if uploaded_file:
# Read bytes
file_bytes = uploaded_file.read()
st.write(f"**Filename:** {uploaded_file.name} — {len(file_bytes)//1024} KB")
# Try extracting text
with st.spinner("Extracting text from PDF..."):
try:
full_text = extract_text(file_bytes)
if not full_text or len(full_text.strip()) < 50:
st.warning("Extracted text is short or empty. The PDF may be scanned images. Try another PDF or enable OCR.")
else:
st.success(f"Extracted {len(full_text.split())} words from PDF.")
# Save in session
st.session_state["full_text"] = full_text
except Exception as e:
st.error(f"Failed to extract text: {e}")
st.stop()
else:
st.info("Please upload a PDF to enable summary and MCQ generation.")
# Action buttons
st.markdown("---")
st.header("Generate Content")
colA, colB = st.columns([1,1])
with colA:
if st.button("Generate Summary"):
if not uploaded_file:
st.error("Upload a PDF first.")
else:
try:
with st.spinner("Generating summary (OpenAI)..."):
ensure_openai_key()
# If document is very large, you might want to chunk and summarize iteratively.
summary_text = generate_summary(st.session_state["full_text"], model=model_choice)
st.session_state["summary"] = summary_text
st.success("Summary generated.")
except Exception as e:
st.error(f"Summary generation failed: {e}")
with colB:
if st.button(f"Generate {mcq_target} MCQs"):
if not uploaded_file:
st.error("Upload a PDF first.")
else:
try:
with st.spinner("Generating MCQs (this may take a moment)..."):
ensure_openai_key()
mcq_text = generate_mcqs(st.session_state["full_text"], model=model_choice, count=int(mcq_target))
st.session_state["mcq_text"] = mcq_text
st.success("MCQs generated.")
except Exception as e:
st.error(f"MCQ generation failed: {e}")
# Generate both
if st.button("Generate Summary + MCQs"):
if not uploaded_file:
st.error("Upload a PDF first.")
else:
try:
with st.spinner("Generating summary + MCQs..."):
ensure_openai_key()
st.session_state["summary"] = generate_summary(st.session_state["full_text"], model=model_choice)
st.session_state["mcq_text"] = generate_mcqs(st.session_state["full_text"], model=model_choice, count=int(mcq_target))
st.success("Summary and MCQs generated.")
except Exception as e:
st.error(f"Combined generation failed: {e}")
# Prepare retrieval infrastructure
if uploaded_file and ("full_text" in st.session_state):
if st.button("Prepare Q&A (create embeddings)"):
try:
with st.spinner("Chunking document and computing embeddings (costly operation)..."):
chunks = chunk_text(st.session_state["full_text"], words_per_chunk=int(chunk_size), overlap=int(chunk_overlap))
st.session_state["chunks"] = chunks
# Compute embeddings (cached)
chunk_embs = get_embeddings(chunks, model=emb_model_choice)
st.session_state["chunk_embeddings"] = chunk_embs
st.success(f"Prepared {len(chunks)} chunks and embeddings for retrieval.")
except Exception as e:
st.error(f"Failed to prepare embeddings: {e}")
st.markdown("---")
st.header("Download / Export")
st.markdown("After generating content, download a combined study package.")
if st.session_state.get("summary") or st.session_state.get("mcq_text") or st.session_state["qa_history"]:
# Compose markdown
composed = []
if st.session_state.get("summary"):
composed.append("# Summary\n\n" + st.session_state["summary"] + "\n\n")
if st.session_state.get("mcq_text"):
composed.append("# MCQs\n\n" + st.session_state["mcq_text"] + "\n\n")
if st.session_state.get("qa_history"):
qalist = ["# Q&A History\n"]
for qa in st.session_state["qa_history"]:
qalist.append(f"**Q:** {qa['question']}\n\n**A:** {qa['answer']}\n\n_Time:_ {qa['time']}\n\n")
composed.append("\n".join(qalist))
package_md = "\n".join(composed)
st.markdown(make_text_download(package_md, filename=f"{uploaded_file.name}_study_package.md"), unsafe_allow_html=True)
st.download_button("Download study package (.md)", package_md, file_name=f"{uploaded_file.name}_study_package.md", mime="text/markdown")
else:
st.info("No generated content yet. Run summary/MCQ generation first.")
with right_col:
# Tabs: Summary, MCQ Quiz, Q&A
tab1, tab2, tab3 = st.tabs(["\U0001f4d1 Summary", "\U0001f4dd MCQ Quiz", "\u2753 Q&A Dashboard"])
with tab1:
st.header("Document Summary")
if st.session_state.get("summary"):
st.markdown("", unsafe_allow_html=True)
st.markdown(st.session_state["summary"], unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
else:
st.info("No summary yet. Click 'Generate Summary' in the left panel.")
with tab2:
st.header("Generated MCQs")
if st.session_state.get("mcq_text"):
# Display with formatting: question line and indented options vertically
st.markdown("", unsafe_allow_html=True)
# We display as preformatted but with handwriting font and indentation
st.text_area("MCQs (read-only)", value=st.session_state["mcq_text"], height=420, key="mcq_display")
st.markdown("
", unsafe_allow_html=True)
# Also provide CSV download parsed
def parse_mcqs_to_df(mcq_text: str) -> pd.DataFrame:
lines = mcq_text.splitlines()
rows = []
q_text = None
opts = {"A":"","B":"","C":"","D":""}
answer = ""
for ln in lines:
if not ln.strip():
continue
# Question detection: starts with "Question" or "Q"
if ln.strip().lower().startswith("question"):
if q_text:
rows.append({"question": q_text.strip(), "A": opts["A"].strip(), "B": opts["B"].strip(), "C": opts["C"].strip(), "D": opts["D"].strip(), "answer": answer.strip()})
# reset
parts = ln.split(":",1)
if len(parts) > 1:
q_text = parts[1].strip()
else:
q_text = ln.strip()
opts = {"A":"","B":"","C":"","D":""}
answer = ""
elif ln.strip().startswith("A.") or ln.strip().startswith("A)"):
opts["A"] = ln.strip()[2:].strip()
elif ln.strip().startswith("B.") or ln.strip().startswith("B)"):
opts["B"] = ln.strip()[2:].strip()
elif ln.strip().startswith("C.") or ln.strip().startswith("C)"):
opts["C"] = ln.strip()[2:].strip()
elif ln.strip().startswith("D.") or ln.strip().startswith("D)"):
opts["D"] = ln.strip()[2:].strip()
elif ln.strip().lower().startswith("answer"):
parts = ln.split(":",1)
if len(parts) > 1:
answer = parts[1].strip()
if q_text:
rows.append({"question": q_text.strip(), "A": opts["A"].strip(), "B": opts["B"].strip(), "C": opts["C"].strip(), "D": opts["D"].strip(), "answer": answer.strip()})
return pd.DataFrame(rows)
df_mcq = parse_mcqs_to_df(st.session_state["mcq_text"])
if not df_mcq.empty:
st.download_button("Download MCQs as CSV", df_mcq.to_csv(index=False), file_name=f"{uploaded_file.name}_mcqs.csv", mime="text/csv")
else:
st.info("No MCQs generated yet. Click 'Generate MCQs' in the left panel.")
with tab3:
st.header("Q&A Dashboard")
st.markdown("Ask questions about the PDF. Use 'Prepare Q&A' first (computes embeddings).")
question_input = st.text_input("Enter your question here:")
if st.button("Ask question"):
if not st.session_state.get("chunks") or not st.session_state.get("chunk_embeddings"):
st.warning("Please click 'Prepare Q&A (create embeddings)' in the left panel first.")
elif not question_input.strip():
st.error("Please type a question.")
else:
try:
with st.spinner("Retrieving context and generating answer..."):
ans = answer_question(question_input, st.session_state["chunks"], st.session_state["chunk_embeddings"], emb_model_choice, model_choice, top_k=int(retrieval_k))
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
st.session_state["qa_history"].append({"question": question_input, "answer": ans, "time": timestamp})
st.success("Answer generated.")
except Exception as e:
st.error(f"Q&A failed: {e}")
# Show history
if st.session_state["qa_history"]:
st.markdown("### Recent Q&A")
for qa in reversed(st.session_state["qa_history"][-8:]):
st.markdown(f"Q: {qa['question']}
A: {qa['answer']}
Time: {qa['time']}
", unsafe_allow_html=True)
# Download Q&A
qa_md = "\n\n".join([f"Q: {qa['question']}\nA: {qa['answer']}\nTime: {qa['time']}" for qa in st.session_state["qa_history"]])
st.download_button("Download Q&A history (.txt)", qa_md, file_name=f"{uploaded_file.name}_qa_history.txt", mime="text/plain")
else:
st.info("No Q&A history yet.")
# -------------------------
# Footer
# -------------------------
st.markdown("---")
st.markdown("Developed as **AI Study Assistant** — Upload a PDF, generate summary & MCQs, and ask questions!")
# End of app.py