"""
# Welcome to Streamlit!
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
forums](https://discuss.streamlit.io).
In the meantime, below is an example of what you can do with just a few lines of code:
"""
import os
import re
import json
import requests
import datetime as dt
import streamlit as st
from openai import OpenAI
# ------------------------------
# Config & Clients
# ------------------------------
# # ==== 放在顶部某处(全局变量/工具)====
# DEBUG_MODE = st.sidebar.checkbox("Debug 模式(显示原始返回)", value=False)
client = OpenAI(
base_url="https://api.aimlapi.com/v1",
api_key=os.environ.get("AIML_API_KEY"),
)
MODEL = os.environ.get("AIML_MODEL", "gpt-5-2025-08-07") # or gpt-5
st.set_page_config(page_title="LearnX5 Tutor — Learn & Review", layout="wide")
# simplify(The Space restart will result in data loss. It can be expanded to HF Datasets or an external database.)
DATA_DIR = "./data"
REV_PATH = f"{DATA_DIR}/reviews.json"
os.makedirs(DATA_DIR, exist_ok=True)
if not os.path.exists(REV_PATH):
with open(REV_PATH, "w", encoding="utf-8") as f:
json.dump([], f, ensure_ascii=False, indent=2)
# ------------------------------
# Helpers
# ------------------------------
@st.cache_data(show_spinner=False)
def fetch_github_readme(owner_repo: str):
"""Fetch README.md (or root README) via raw URLs.
owner_repo: "owner/repo"
"""
raw_candidates = [
f"https://raw.githubusercontent.com/{owner_repo}/HEAD/README.md",
f"https://raw.githubusercontent.com/{owner_repo}/main/README.md",
f"https://raw.githubusercontent.com/{owner_repo}/master/README.md",
]
for url in raw_candidates:
r = requests.get(url, timeout=12)
if r.status_code == 200 and len(r.text) > 32:
return r.text
return ""
def split_markdown_units(md: str, max_units: int = 12):
"""Roughly split into learning units by secondary/tertiary headings."""
if not md:
return [{"title": "README", "content": "(README not found)"}]
# Find all ## or ### paragraphs
blocks = re.split(r"\n(?=##\s)|\n(?=###\s)", md)
units = []
for i, b in enumerate(blocks):
title_match = re.match(r"^(#{2,3})\s+(.+)", b.strip())
title = title_match.group(2).strip() if title_match else ("Section " + str(i+1))
units.append({"title": title, "content": b.strip()})
if len(units) >= max_units:
break
# If too few, add an overview
if len(units) < 2:
units = [{"title": "Overview", "content": md}]
return units
# def call_gpt_json(user_prompt: str, system_prompt: str = ""):
# rsp = client.responses.create(
# model=MODEL,
# response_format={"type": "json_object"},
# input=[
# {"role": "system", "content": system_prompt or "You are a helpful, expert learning coach."},
# {"role": "user", "content": user_prompt},
# ],
# )
# text = rsp.output_text
# try:
# return json.loads(text) if text else {}
# except Exception:
# return {"raw": text}
# def _coerce_json(text: str):
# # 尝试把返回内容里的 JSON 提取出来(兜底)
# try:
# return json.loads(text)
# except Exception:
# m = re.search(r"\{.*\}", text, flags=re.S)
# if m:
# try: return json.loads(m.group(0))
# except: pass
# return {"raw": text}
# def call_gpt_json(user_prompt: str, system_prompt: str = ""):
# # 1) 尝试 Responses API(如果 Aimlapi/SDK 支持)
# try:
# rsp = client.responses.create(
# model=MODEL,
# input=[
# {"role": "system", "content": system_prompt or "You are a helpful coach."},
# {"role": "user", "content": user_prompt},
# ],
# # 某些环境不支持这个参数,就会抛 TypeError
# response_format={"type": "json_object"},
# )
# text = getattr(rsp, "output_text", None) or json.dumps(rsp, ensure_ascii=False)
# return json.loads(text)
# except TypeError:
# # 2) 降级到 Chat Completions JSON 模式
# try:
# rsp = client.chat.completions.create(
# model=MODEL,
# messages=[
# {"role": "system", "content": system_prompt or "You are a helpful coach."},
# {"role": "user", "content": user_prompt},
# ],
# response_format={"type": "json_object"},
# temperature=0.2,
# )
# text = rsp.choices[0].message.content
# return json.loads(text)
# except Exception as e2:
# # 3) 最后兜底:让模型仅输出 JSON,然后手动解析
# prompt = (
# "Return a STRICT JSON object only. No prose, no code fences.\n\n" + user_prompt
# )
# rsp = client.chat.completions.create(
# model=MODEL,
# messages=[
# {"role": "system", "content": (system_prompt or "You are a helpful coach.") + " Output must be a SINGLE JSON object."},
# {"role": "user", "content": prompt},
# ],
# temperature=0.1,
# )
# text = rsp.choices[0].message.content
# return _coerce_json(text)
# def call_gpt_text(messages):
# rsp = client.responses.create(model=MODEL, input=messages)
# return rsp.output_text
# def call_gpt_text(messages):
# # 优先 Responses;不行就用 Chat Completions
# try:
# rsp = client.responses.create(
# model=MODEL,
# input=messages, # [{"role": "...", "content": "..."}]
# )
# return rsp.output_text
# except TypeError:
# rsp = client.chat.completions.create(
# model=MODEL,
# messages=messages,
# temperature=0.3,
# )
# return rsp.choices[0].message.content
def _extract_text_from_responses_obj(rsp):
"""
Compatible with different Responses implementations:
- rsp.output_text
- rsp.output[*].content[*].text
- rsp.choices[*].message.content(Some compatibility layers directly return the chat structure)
- dict/json (scenarios)
"""
# 1) SDK Object might output_text
text = getattr(rsp, "output_text", None)
if text:
return text
# 2) SDK Object might be convertible to dict
try:
d = rsp if isinstance(rsp, dict) else rsp.model_dump()
except Exception:
try:
d = json.loads(str(rsp))
except Exception:
d = None
if isinstance(d, dict):
# 2a) Standard Responses Tree
out = d.get("output") or d.get("response") or {}
# Classical shape:{"output":[{"content":[{"type":"output_text","text":"..."}]}]}
if isinstance(out, list) and out:
content = out[0].get("content") if isinstance(out[0], dict) else None
if isinstance(content, list):
# Found text
for c in content:
if isinstance(c, dict):
if "text" in c and c["text"]:
return c["text"]
if c.get("type") in ("output_text","text") and c.get("text"):
return c["text"]
# 2b) Some compatibility layers return chat structure directly
choices = d.get("choices")
if isinstance(choices, list) and choices:
msg = choices[0].get("message", {})
if isinstance(msg, dict) and msg.get("content"):
return msg["content"]
# 2c) Some layers put the main text in the top-level text/message
for key in ("text","message","content"):
if isinstance(d.get(key), str) and d[key].strip():
return d[key]
# 3) If all else fails, return an empty string
return ""
def _coerce_json(text: str):
"""Try to parse the model output as JSON; otherwise, enclose it in {"raw": "..." }"""
if not text or not str(text).strip():
return {"raw": ""}
try:
return json.loads(text)
except Exception:
m = re.search(r"\{.*\}", str(text), flags=re.S)
if m:
try: return json.loads(m.group(0))
except: pass
return {"raw": str(text).strip()}
def _as_dict(obj):
"""Try to turn SDK response into dict safely"""
if isinstance(obj, dict):
return obj
for attr in ("model_dump", "to_dict", "dict"):
if hasattr(obj, attr):
try:
return getattr(obj, attr)()
except Exception:
pass
try:
return json.loads(str(obj))
except Exception:
return None
# def call_gpt_json(user_prompt: str, system_prompt: str = ""):
# """
# Priority Responses; If unavailable, automatically downgrade to Chat.
# No longer heavily rely on response_format; use prompt + fallback parsing to ensure JSON format.
# """
# sys = (system_prompt or "You are a helpful coach.") + \
# " Output MUST be a single valid JSON object. No prose, no code fences."
# # 1) try responses
# try:
# rsp = client.responses.create(
# model=MODEL,
# input=[
# {"role": "system", "content": sys},
# {"role": "user", "content": user_prompt},
# ],
# # temperature=0.2,
# )
# raw_dict = _as_dict(rsp) or str(rsp)
# st.write("🧾 Raw response (dict) →", raw_dict) # 页面查看
# print("RAW:", json.dumps(raw_dict, ensure_ascii=False) if isinstance(raw_dict, dict) else raw_dict)
# text = _extract_text_from_responses_obj(rsp)
# # Debug:Display/print the parsed original text
# st.write("🔎 Debug -Mentioned text:", text)
# print("DEBUG text:", repr(text)) # For command line visibility
# data = _coerce_json(text)
# # Debug:View the JSON structure
# st.write("🔎 Debug - JSON 数据:", data)
# print("DEBUG data:", data)
# # For easy debugging: Store the original response in the session.
# st.session_state._last_api_json = getattr(rsp, "model_dump", lambda: str(rsp))()
# return data
# except TypeError:
# # Some implementations do not support responses, fallback to chat
# pass
# except Exception as e:
# # Other exceptions try chat
# st.info(f"Responses call exception, switching to chat: {e}")
# # 2) chat.completions
# rsp = client.chat.completions.create(
# model=MODEL,
# messages=[
# {"role": "system", "content": sys},
# {"role": "user", "content": user_prompt},
# ],
# temperature=0.2,
# )
# text = rsp.choices[0].message.content if rsp.choices else ""
# st.session_state._last_api_json = rsp.model_dump() if hasattr(rsp, "model_dump") else rsp
# return _coerce_json(text)
def call_gpt_json(user_prompt: str, system_prompt: str = ""):
sys = (system_prompt or "You are a helpful coach.") + \
" Output MUST be a single valid JSON object. No prose, no code fences."
try:
rsp = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": sys},
{"role": "user", "content": user_prompt},
],
temperature=0.2,
)
raw = rsp.model_dump() if hasattr(rsp, "model_dump") else rsp
st.session_state._last_api_json = raw
text = rsp.choices[0].message.content if rsp.choices else ""
return _coerce_json(text)
except Exception as e:
st.error(f"API key is limited, please try again later (10 requests for one hour)")
return {"raw": ""}
def call_gpt_text(messages):
"""
Text answer: Priority responses; failed chat downgrade.
messages like [{"role": "...", "content": "..."}]
"""
try:
rsp = client.responses.create(model=MODEL, input=messages, temperature=0.3)
st.session_state._last_api_json = getattr(rsp, "model_dump", lambda: str(rsp))()
return _extract_text_from_responses_obj(rsp)
except TypeError:
pass
except Exception as e:
pass
# st.info(f"Responses call exception, switching to chat: {e}")
rsp = client.chat.completions.create(model=MODEL, messages=messages, temperature=0.3)
st.session_state._last_api_json = rsp.model_dump() if hasattr(rsp, "model_dump") else rsp
return rsp.choices[0].message.content if rsp.choices else ""
def save_review(item):
with open(REV_PATH, "r", encoding="utf-8") as f:
arr = json.load(f)
arr.insert(0, item)
with open(REV_PATH, "w", encoding="utf-8") as f:
json.dump(arr, f, ensure_ascii=False, indent=2)
@st.cache_data(show_spinner=False)
def load_reviews():
with open(REV_PATH, "r", encoding="utf-8") as f:
return json.load(f)
# ------------------------------
# Sidebar: Survey
# ------------------------------
st.sidebar.header("🎯 Initial")
persona = st.sidebar.multiselect(
"Who are you(multiple)",
["Beginner", "University Student", "High School Student", "Career Changer", "IT Professional", "Data Analyst", "Researcher", "Other"],
)
interests = st.sidebar.multiselect(
"Areas of Interest(multiple)",
[
"Java", "Python", "C++", "C", "C#", "Front-end (HTML/CSS/JS)", "Back-end (Node/Java/.NET)",
"Data Analysis", "Machine Learning", "AI Agent", "DevOps", "Cloud (Azure/AWS/GCP)"
],
)
goals = st.sidebar.multiselect(
"Your Goals(multiple)",
["Expand Employment Skills", "Data Analysis", "Academic Research", "Further Education/Employment", "Hobbies", "LeetCode/Algorithms", "System Design"],
)
with st.sidebar.expander("study time"):
time_mode = st.radio("Frequency", ["per day", "per week"], horizontal=True)
hours = st.number_input("Average Study Time (hours)", min_value=0.5, max_value=100.0, value=10.0, step=0.5)
if "profile" not in st.session_state:
st.session_state.profile = {}
if st.sidebar.button("🧠 generate the plan", use_container_width=True):
profile = {
"persona": persona,
"interests": interests,
"goals": goals,
"time": {"mode": time_mode, "hours": hours},
}
st.session_state.profile = profile
# 1) 让 GPT recommend GitHub ebooks + 30 days study plan(JSON)
user = (
f"Persona: {', '.join(persona) or '(none)'}\n"
f"Interests: {', '.join(interests) or '(none)'}\n"
f"Goals: {', '.join(goals) or '(none)'}\n"
f"Time: {time_mode} ~ {hours} hours\n"
)
sys = "You are a senior curriculum designer. Recommend high-quality, actively maintained GitHub repos (1-3) that match the user's profile (prefer star>1k, clear README). Then design a 30-day plan."
prompt = (
"Based on the user profile below, return JSON with fields:\n" \
"{\n \"repos\": [ { \"name\": string, \"url\": string, \"why\": string } ],\n \"plan30\": [ { \"dayRange\": string, \"milestones\": string[] } ]\n}\n\n" \
"Profile:\n" + user +
"Constraints:\n- repos <= 3, concise reasons.\n- Use free resources only.\n- Plan considers the user's time budget."
)
with st.spinner("In the suggestions generated for GPT-5..."):
out = call_gpt_json(prompt, sys)
st.session_state.reco = out
# if st.checkbox("Display original return (debug)"):
# st.write(st.session_state.get("_last_api_json"))
# ------------------------------
# Main Tabs
# ------------------------------
st.title("📚 LearnX5 Tutor — Study / Review")
tab1, tab2 = st.tabs(["Study Module", "Review Module"])
# ------------------------------
# Study Module
# ------------------------------
with tab1:
st.subheader("Recommended Repositories and Learning Units")
# colA, colB = st.columns([1, 1])
# with colA:
if st.session_state.get("reco"):
# st.json(st.session_state.reco)
# ------------------------------ 取代原来的 st.json(...) 展示 ------------------------------
plan = st.session_state.get("reco")
if plan and isinstance(plan, dict) and plan.get("repos"):
st.subheader("📚 Recommended Repositories")
def normalize_repo_url(s: str):
if not s:
return ""
m = re.search(r"github\.com/([\w.-]+/[\w.-]+)", s)
return m.group(1) if m else s.strip()
for i, repo in enumerate(plan["repos"]):
name = repo.get("name", f"repo-{i}")
url = repo.get("url", "")
why = repo.get("why", "")
with st.container(border=True):
top_l, top_r = st.columns([6, 1])
with top_l:
st.markdown(f"### {name}")
st.write(why)
st.caption(url)
with top_r:
# View 按钮:首选 link_button,不支持就退化成超链接
try:
st.link_button("View ↗", url, key=f"view_{i}", use_container_width=True)
except Exception:
st.markdown(f"[View ↗]({url})")
# Choose 按钮:选择该仓库并准备学习单元
if st.button("Choose ✅", key=f"choose_{i}", use_container_width=True):
owner_repo = normalize_repo_url(url) or name
with st.spinner("Fetching README & splitting units…"):
md = fetch_github_readme(owner_repo)
st.session_state.repo = {"name": owner_repo, "readme": md}
st.session_state.units = split_markdown_units(md)
st.success(f"Selected **{owner_repo}** · Split into {len(st.session_state.units)} units")
# 可选:自动把输入框填上,方便用户看到已选仓库
st.session_state["last_chosen_repo"] = owner_repo
# 也可以在这里触发默认选中第一个单元:
# st.session_state["default_unit_index"] = 0
# 页面会继续使用下方的单位选择器
# st.markdown("—")
else:
st.info("Fill out the questionnaire on the left and click **Generate** to get repository cards.")
# # Select a GitHub repository
# st.markdown("---")
# repo_input = st.text_input("Select or paste a GitHub repository (owner/repo or full URL)", placeholder="e.g. TheAlgorithms/Python")
def normalize_repo(s: str):
if not s:
return ""
m = re.search(r"github\.com/([\w.-]+/[\w.-]+)", s)
return m.group(1) if m else s.strip()
# if st.button("📥 get README and split units", disabled=not repo_input):
# owner_repo = normalize_repo(repo_input)
# with st.spinner("Fetching README.md…"):
# md = fetch_github_readme(owner_repo)
# st.session_state.repo = {"name": owner_repo, "readme": md}
# st.session_state.units = split_markdown_units(md)
# st.success(f"Split into {len(st.session_state.units)} study units")
if "units" in st.session_state and st.session_state.units:
unit_titles = [u["title"] for u in st.session_state.units]
idx = st.selectbox("Select Study Unit", list(range(len(unit_titles))), format_func=lambda i: unit_titles[i])
unit = st.session_state.units[idx]
left, right = st.columns([1.4, 1])
# with left:
# st.markdown(f"### 📖 {unit['title']}")
# st.markdown(st.session_state.repo.get("readme")[:2000] if len(unit['content']) < 200 else unit['content'])
# st.caption("The left side displays the warehouse knowledge of this unit (split by the sections of the README).")
with left:
st.markdown(f"### 📖 {unit['title']}")
# ✅ 渲染 HTML,让
/ 生效
st.markdown(
st.session_state.repo.get("readme")[:2000] if len(unit["content"]) < 200 else unit["content"],
unsafe_allow_html=True
)
st.caption("The left side displays the warehouse knowledge of this unit (split by the sections of the README).")
with right:
st.markdown("### 🤖 GPT‑5 Q&A")
if "chat" not in st.session_state:
st.session_state.chat = [] # [{q,a}]
# Simple chat input
q = st.text_area("Your Question", height=120, placeholder="Please provide explanations/examples/practice suggestions based on the content on the left...")
if st.button("Send Question", disabled=not q):
# Constructing System Prompt: Use the information on the left as the known context + Request for LeetCode/Knowledge Base suggestions
system = (
"You are a patient, expert CS tutor. Use ONLY the given repo context as known facts when answering; "
"if missing, say what is missing and suggest how to find it in the repo. Provide step-by-step guidance. "
"After answering, suggest 1-3 relevant LeetCode topics/problems or reputable knowledge-base articles for practice."
)
repo_ctx = unit["content"][:6000]
messages = [
{"role": "system", "content": system},
{"role": "user", "content": f"Repo context (excerpt):\n\n{repo_ctx}\n\nQuestion: {q}"},
]
with st.spinner("GPT‑5 Thinking…"):
a = call_gpt_text(messages)
st.session_state.chat.append({"q": q, "a": a, "unit": unit["title"]})
# Displaying conversation
for i, turn in enumerate(reversed(st.session_state.chat[-8:])):
st.markdown(f"**You:** {turn['q']}")
st.markdown(f"**LearnX5:** {turn['a']}")
st.markdown("---")
# Completing study: Generate summary → Save to review module
if st.button("✅ Complete Study Unit (Generate Summary and Save)", use_container_width=True):
# Aggregate current unit-related conversations
related = [t for t in st.session_state.chat if t["unit"] == unit["title"]]
qa_text = "\n\n".join([f"Q: {t['q']}\nA: {t['a']}" for t in related])
summary_prompt = (
"Summarize the key takeaways from this study unit. Use the repo context and EMPHASIZE topics covered in the Q&A. "
"Return JSON: {\n \"unit\": string, \"summary\": string, \"keyPoints\": string[], \"followUps\": string[]\n}"
)
with st.spinner("Generating study summary…"):
j = call_gpt_json(
user_prompt=(
f"Repo unit title: {unit['title']}\n\nRepo context (excerpt):\n{unit['content'][:6000]}\n\nQ&A:\n{qa_text[:6000]}\n\n"
+ summary_prompt
),
system_prompt="You are a precise note-taker for spaced repetition.",
)
item = {
"ts": dt.datetime.utcnow().isoformat() + "Z",
"repo": st.session_state.repo.get("name"),
"unit": j.get("unit", unit["title"]),
"summary": j.get("summary", ""),
"keyPoints": j.get("keyPoints", []),
"followUps": j.get("followUps", []),
}
save_review(item)
st.success("Saved to review module ✅")
# ------------------------------
# Review Module
# ------------------------------
with tab2:
st.subheader("🗂️ Study Summary Archive")
reviews = load_reviews()
if not reviews:
st.info("No summaries available yet. Please save one after completing a study unit.")
else:
for r in reviews:
with st.container(border=True):
st.markdown(f"**Time**: {r['ts']} | **Repository**: {r.get('repo','-')} | **Unit**: {r.get('unit','-')}")
if r.get("summary"):
st.markdown("**Summary**: " + r["summary"])
if r.get("keyPoints"):
st.markdown("**Key Points**:")
st.write("\n".join([f"• {x}" for x in r["keyPoints"]]))
if r.get("followUps"):
st.markdown("**Follow-up Suggestions**:")
st.write("\n".join([f"• {x}" for x in r["followUps"]]))