Spaces:
Paused
Paused
| import os | |
| import re | |
| import json | |
| import html | |
| import pickle | |
| import sqlite3 | |
| from datetime import datetime | |
| from typing import Dict, List, Any, Optional, Tuple | |
| import numpy as np | |
| import pandas as pd | |
| import streamlit as st | |
| import plotly.express as px | |
| from openai import OpenAI | |
| from rank_bm25 import BM25Okapi | |
| from sentence_transformers import SentenceTransformer | |
| # ===================================================== | |
| # CONFIGURATION | |
| # ===================================================== | |
| APP_TITLE = "BrainChat PMQSN" | |
| # ===================================================== | |
| # PATH CONFIGURATION FOR HUGGING FACE STREAMLIT TEMPLATE | |
| # Your files are inside the src/ folder, so all paths start from src. | |
| # ===================================================== | |
| BASE_DIR = "src" | |
| BUILD_DIR = os.path.join(BASE_DIR, "brainchat_build") | |
| CHUNKS_PATH = os.path.join(BUILD_DIR, "chunks.pkl") | |
| TOKENS_PATH = os.path.join(BUILD_DIR, "tokenized_chunks.pkl") | |
| EMBED_PATH = os.path.join(BUILD_DIR, "embeddings.npy") | |
| CONFIG_PATH = os.path.join(BUILD_DIR, "config.json") | |
| QUESTION_BANK_FILE = os.path.join(BASE_DIR, "exam_questions_pmqs.json") | |
| LOGO_FILE = os.path.join(BASE_DIR, "logo.png") | |
| # Database is kept in root by default. | |
| DB_PATH = os.getenv("BRAINCHAT_DB", "brainchat.db") | |
| OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") | |
| TEACHER_PASSWORD = os.getenv("TEACHER_PASSWORD", "teacher123") | |
| TOPICS = [ | |
| "Stroke / Cerebrovascular", | |
| "Epilepsy", | |
| "Headache", | |
| "Multiple Sclerosis / Demyelination", | |
| "Parkinson / Movement Disorders", | |
| "Dementia", | |
| "Neuropathy / Neuromuscular", | |
| "Neuroanatomy / Topography", | |
| "General Neurology", | |
| ] | |
| TRANSLATIONS = { | |
| "English": { | |
| "app_subtitle": "AI tutor and quiz platform for Neurology / PMQSN learning", | |
| "language": "Interface language", | |
| "mode": "Choose mode", | |
| "student_mode": "Student Mode", | |
| "teacher_mode": "Teacher Mode", | |
| "student_id": "Student ID", | |
| "topic": "Choose topic for quiz practice", | |
| "difficulty": "Difficulty level", | |
| "num_questions": "Number of MCQ questions", | |
| "start_quiz": "Generate quiz", | |
| "submit_quiz": "Submit quiz", | |
| "chat": "Tutor Chat", | |
| "quiz": "Topic Quiz", | |
| "report": "Learning Report", | |
| "teacher_password": "Teacher password", | |
| "login": "Open teacher dashboard", | |
| "download_html": "Download HTML report", | |
| "no_data": "No student data available yet.", | |
| "score": "Score", | |
| "weak_areas": "Weak areas", | |
| "badges": "Badges earned", | |
| "confidence_green": "Green = high confidence. The answer is strongly supported by course sources.", | |
| "confidence_orange": "Orange = medium confidence. The answer is partly supported but should be revised.", | |
| "confidence_red": "Red = low confidence. The system found weak or insufficient support.", | |
| "similarity_help": "Similarity is a value from 0 to 1. A higher value means the retrieved course material is closer to the question.", | |
| "badge_help": "Badges are earned from quiz marks only: Bronze = 70% or above in 2 quizzes of the same topic; Silver = 80% or above in 3 quizzes; Gold = 90% or above in 5 quizzes. General badges are also awarded for overall learning consistency.", | |
| "ask_question": "Write your free question here", | |
| "send": "Ask BrainChat", | |
| "student_tip": "Student Mode stores your quiz attempts, scores, weak topics, and badge progress.", | |
| "teacher_tip": "Teacher Mode shows class progress, student-wise performance, and topic-wise weak areas.", | |
| "explain_logic": "How the system measures progress", | |
| "saved": "Attempt saved successfully.", | |
| }, | |
| "Spanish": { | |
| "app_subtitle": "Tutor de IA y plataforma de cuestionarios para Neurología / PMQSN", | |
| "language": "Idioma de la interfaz", | |
| "mode": "Elegir modo", | |
| "student_mode": "Modo estudiante", | |
| "teacher_mode": "Modo profesor", | |
| "student_id": "ID del estudiante", | |
| "topic": "Elegir tema para el cuestionario", | |
| "difficulty": "Nivel de dificultad", | |
| "num_questions": "Número de preguntas tipo test", | |
| "start_quiz": "Generar cuestionario", | |
| "submit_quiz": "Enviar cuestionario", | |
| "chat": "Tutor Chat", | |
| "quiz": "Cuestionario por tema", | |
| "report": "Informe de aprendizaje", | |
| "teacher_password": "Contraseña del profesor", | |
| "login": "Abrir panel del profesor", | |
| "download_html": "Descargar informe HTML", | |
| "no_data": "Todavía no hay datos de estudiantes.", | |
| "score": "Puntuación", | |
| "weak_areas": "Áreas débiles", | |
| "badges": "Insignias obtenidas", | |
| "confidence_green": "Verde = alta confianza. La respuesta está bien apoyada por las fuentes del curso.", | |
| "confidence_orange": "Naranja = confianza media. La respuesta tiene apoyo parcial y debe revisarse.", | |
| "confidence_red": "Rojo = baja confianza. El sistema encontró apoyo débil o insuficiente.", | |
| "similarity_help": "La similitud es un valor de 0 a 1. Un valor más alto significa que el material recuperado se parece más a la pregunta.", | |
| "badge_help": "Las insignias se obtienen solo con las notas del cuestionario: Bronce = 70% o más en 2 cuestionarios del mismo tema; Plata = 80% o más en 3 cuestionarios; Oro = 90% o más en 5 cuestionarios. También hay insignias generales por constancia.", | |
| "ask_question": "Escribe aquí tu pregunta libre", | |
| "send": "Preguntar a BrainChat", | |
| "student_tip": "El modo estudiante guarda intentos, puntuaciones, temas débiles y progreso de insignias.", | |
| "teacher_tip": "El modo profesor muestra progreso de clase, rendimiento por estudiante y áreas débiles por tema.", | |
| "explain_logic": "Cómo el sistema mide el progreso", | |
| "saved": "Intento guardado correctamente.", | |
| }, | |
| } | |
| # ===================================================== | |
| # PAGE SETUP | |
| # ===================================================== | |
| st.set_page_config(page_title=APP_TITLE, page_icon="🧠", layout="wide") | |
| # ===================================================== | |
| # DATABASE | |
| # ===================================================== | |
| def get_conn(): | |
| return sqlite3.connect(DB_PATH, check_same_thread=False) | |
| def init_db(): | |
| conn = get_conn() | |
| cur = conn.cursor() | |
| cur.execute( | |
| """ | |
| CREATE TABLE IF NOT EXISTS students ( | |
| student_id TEXT PRIMARY KEY, | |
| name TEXT, | |
| language TEXT, | |
| created_at TEXT | |
| ) | |
| """ | |
| ) | |
| cur.execute( | |
| """ | |
| CREATE TABLE IF NOT EXISTS quiz_attempts ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| student_id TEXT, | |
| student_name TEXT, | |
| language TEXT, | |
| topic TEXT, | |
| difficulty TEXT, | |
| score INTEGER, | |
| total INTEGER, | |
| percent REAL, | |
| confidence_color TEXT, | |
| weak_areas TEXT, | |
| badges TEXT, | |
| quiz_json TEXT, | |
| answers_json TEXT, | |
| created_at TEXT | |
| ) | |
| """ | |
| ) | |
| cur.execute( | |
| """ | |
| CREATE TABLE IF NOT EXISTS chat_logs ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| student_id TEXT, | |
| language TEXT, | |
| topic TEXT, | |
| question TEXT, | |
| answer TEXT, | |
| confidence_color TEXT, | |
| similarity REAL, | |
| created_at TEXT | |
| ) | |
| """ | |
| ) | |
| conn.commit() | |
| conn.close() | |
| def upsert_student(student_id: str, name: str, language: str): | |
| sid = (student_id or "Guest").strip() or "Guest" | |
| name = (name or sid).strip() or sid | |
| conn = get_conn() | |
| cur = conn.cursor() | |
| cur.execute( | |
| """ | |
| INSERT INTO students(student_id, name, language, created_at) | |
| VALUES (?, ?, ?, ?) | |
| ON CONFLICT(student_id) DO UPDATE SET name=excluded.name, language=excluded.language | |
| """, | |
| (sid, name, language, datetime.now().isoformat(timespec="seconds")), | |
| ) | |
| conn.commit() | |
| conn.close() | |
| def save_quiz_attempt(student_id: str, name: str, language: str, topic: str, difficulty: str, | |
| score: int, total: int, confidence_color: str, weak_areas: List[str], | |
| badges: List[str], quiz: List[Dict[str, Any]], answers: Dict[str, str]): | |
| upsert_student(student_id, name, language) | |
| percent = round((score / max(total, 1)) * 100, 2) | |
| conn = get_conn() | |
| cur = conn.cursor() | |
| cur.execute( | |
| """ | |
| INSERT INTO quiz_attempts(student_id, student_name, language, topic, difficulty, score, total, | |
| percent, confidence_color, weak_areas, badges, quiz_json, answers_json, created_at) | |
| VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) | |
| """, | |
| ( | |
| student_id, name, language, topic, difficulty, score, total, percent, confidence_color, | |
| json.dumps(weak_areas, ensure_ascii=False), json.dumps(badges, ensure_ascii=False), | |
| json.dumps(quiz, ensure_ascii=False), json.dumps(answers, ensure_ascii=False), | |
| datetime.now().isoformat(timespec="seconds"), | |
| ), | |
| ) | |
| conn.commit() | |
| conn.close() | |
| def save_chat_log(student_id: str, language: str, topic: str, question: str, answer: str, | |
| confidence_color: str, similarity: float): | |
| conn = get_conn() | |
| cur = conn.cursor() | |
| cur.execute( | |
| """ | |
| INSERT INTO chat_logs(student_id, language, topic, question, answer, confidence_color, similarity, created_at) | |
| VALUES (?, ?, ?, ?, ?, ?, ?, ?) | |
| """, | |
| (student_id, language, topic, question, answer, confidence_color, similarity, | |
| datetime.now().isoformat(timespec="seconds")), | |
| ) | |
| conn.commit() | |
| conn.close() | |
| def load_attempts_df() -> pd.DataFrame: | |
| conn = get_conn() | |
| try: | |
| df = pd.read_sql_query("SELECT * FROM quiz_attempts ORDER BY created_at DESC", conn) | |
| finally: | |
| conn.close() | |
| return df | |
| def load_chat_df() -> pd.DataFrame: | |
| conn = get_conn() | |
| try: | |
| df = pd.read_sql_query("SELECT * FROM chat_logs ORDER BY created_at DESC", conn) | |
| finally: | |
| conn.close() | |
| return df | |
| # ===================================================== | |
| # RAG / QUESTION BANK | |
| # ===================================================== | |
| def tokenize(text: str) -> List[str]: | |
| return re.findall(r"\w+", (text or "").lower(), flags=re.UNICODE) | |
| def clean_source_name(book_name: str) -> str: | |
| name = (book_name or "").strip() | |
| low = name.lower().replace("_", " ").replace("-", " ") | |
| professor_markers = ["ilovepdf", "i love pdf", "merged", "professor", "teacher", "lecture", "handout", "notes"] | |
| if any(marker in low for marker in professor_markers): | |
| return "Professor Handouts" | |
| if name.lower().endswith(".pdf"): | |
| name = name[:-4] | |
| return name or "Course Material" | |
| def expand_short_query(query: str) -> str: | |
| q = (query or "").strip() | |
| q_lower = q.lower() | |
| expansions = { | |
| "mri": "MRI magnetic resonance imaging resonancia magnética RM neuroimaging brain scan", | |
| "rm": "RM MRI resonancia magnética magnetic resonance imaging neuroimaging brain scan", | |
| "ct": "CT computed tomography tomografía computarizada TC brain scan", | |
| "tc": "TC CT tomografía computarizada computed tomography brain scan", | |
| "csf": "CSF cerebrospinal fluid LCR líquido cefalorraquídeo", | |
| "lcr": "LCR líquido cefalorraquídeo CSF cerebrospinal fluid", | |
| "eeg": "EEG electroencephalography electroencefalograma epilepsy seizure crisis", | |
| } | |
| if q_lower in expansions: | |
| return expansions[q_lower] | |
| return q | |
| def load_rag_resources(): | |
| missing = [p for p in [CHUNKS_PATH, TOKENS_PATH, EMBED_PATH, CONFIG_PATH] if not os.path.exists(p)] | |
| if missing: | |
| return None, None, None, None, f"Missing course build files: {', '.join(missing)}" | |
| with open(CHUNKS_PATH, "rb") as f: | |
| chunks = pickle.load(f) | |
| with open(TOKENS_PATH, "rb") as f: | |
| tokenized_chunks = pickle.load(f) | |
| embeddings = np.load(EMBED_PATH) | |
| with open(CONFIG_PATH, "r", encoding="utf-8") as f: | |
| cfg = json.load(f) | |
| bm25 = BM25Okapi(tokenized_chunks) | |
| embed_model = SentenceTransformer(cfg["embedding_model"]) | |
| return chunks, embeddings, bm25, embed_model, None | |
| def get_client(): | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| if not api_key: | |
| return None | |
| return OpenAI(api_key=api_key) | |
| def load_question_bank() -> List[Dict[str, Any]]: | |
| if not os.path.exists(QUESTION_BANK_FILE): | |
| return [] | |
| try: | |
| with open(QUESTION_BANK_FILE, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| return data if isinstance(data, list) else [] | |
| except Exception: | |
| return [] | |
| def detect_topic(text: str) -> str: | |
| t = (text or "").lower() | |
| topics = { | |
| "Stroke / Cerebrovascular": ["stroke", "ictus", "acm", "mca", "reperfusion", "trombol", "carótida", "hemipares", "afasia", "aspects", "vascular"], | |
| "Epilepsy": ["epile", "seizure", "crisis", "convuls", "eeg", "antiepil", "valpro", "levetiracetam"], | |
| "Headache": ["headache", "cefalea", "migraine", "migraña", "racimos", "trigémino", "cluster"], | |
| "Multiple Sclerosis / Demyelination": ["multiple sclerosis", "esclerosis", "desmiel", "nmosd", "neuromielitis", "lcr", "oligoclon"], | |
| "Parkinson / Movement Disorders": ["parkinson", "temblor", "bradicinesia", "levodopa", "diston", "movimiento", "supranuclear", "multisist"], | |
| "Dementia": ["dementia", "demencia", "alzheimer", "cognit", "memoria", "alucinaciones", "lewy"], | |
| "Neuropathy / Neuromuscular": ["neurop", "miasten", "myasthen", "guillain", "ela", "motoneur", "fascicul", "miopat"], | |
| "Neuroanatomy / Topography": ["topograf", "localiza", "lesion", "lesión", "médula", "tronco", "arteria", "quiasma", "reflejo", "sensibilidad"], | |
| } | |
| for topic, keys in topics.items(): | |
| if any(k in t for k in keys): | |
| return topic | |
| return "General Neurology" | |
| def filter_question_examples(topic: str, limit: int = 8) -> List[Dict[str, Any]]: | |
| bank = load_question_bank() | |
| matches = [q for q in bank if detect_topic(q.get("question", "") + " " + " ".join(o.get("text", "") for o in q.get("options", []))) == topic] | |
| if not matches: | |
| matches = bank[:limit] | |
| return matches[:limit] | |
| def search_hybrid(query: str, final_k: int = 6) -> Tuple[List[Dict[str, Any]], Optional[str]]: | |
| chunks, embeddings, bm25, embed_model, err = load_rag_resources() | |
| if err: | |
| return [], err | |
| expanded_query = expand_short_query(query) | |
| q_tokens = tokenize(expanded_query) | |
| bm25_scores = bm25.get_scores(q_tokens) | |
| shortlist_idx = np.argsort(bm25_scores)[::-1][:40] | |
| shortlist_emb = embeddings[shortlist_idx] | |
| qvec = embed_model.encode([expanded_query], normalize_embeddings=True).astype("float32")[0] | |
| dense_scores = shortlist_emb @ qvec | |
| results = [] | |
| for idx, dense_score in zip(shortlist_idx, dense_scores): | |
| r = chunks[int(idx)].copy() | |
| clean_book = clean_source_name(r.get("book", "")) | |
| bm25_score = float(bm25_scores[idx]) | |
| bm25_norm = min(bm25_score / 10.0, 0.20) | |
| priority_boost = 0.35 if clean_book == "Professor Handouts" else 0.0 | |
| final_score = float(dense_score) + bm25_norm + priority_boost | |
| r["book"] = clean_book | |
| r["similarity_score"] = float(dense_score) | |
| r["bm25_score"] = bm25_score | |
| r["final_score"] = final_score | |
| results.append(r) | |
| results = sorted(results, key=lambda x: x.get("final_score", 0), reverse=True) | |
| return results[:final_k], None | |
| def build_context(records: List[Dict[str, Any]]) -> str: | |
| blocks = [] | |
| for i, r in enumerate(records, 1): | |
| blocks.append( | |
| f"""[Source {i}] | |
| Book: {r.get('book','Course Material')} | |
| Section: {r.get('section_title','')} | |
| Pages: {r.get('page_start','')}-{r.get('page_end','')} | |
| Similarity: {r.get('similarity_score', 0):.3f} | |
| Text: | |
| {r.get('text','')[:2500]}""" | |
| ) | |
| return "\n\n".join(blocks) | |
| def confidence_from_similarity(similarity: float) -> str: | |
| if similarity >= 0.58: | |
| return "green" | |
| if similarity >= 0.42: | |
| return "orange" | |
| return "red" | |
| def badges_for_student(student_id: str) -> List[str]: | |
| """ | |
| Quiz-score based badge system. | |
| Topic badges: | |
| - Bronze: 70% or above in at least 2 quizzes of the same topic | |
| - Silver: 80% or above in at least 3 quizzes of the same topic | |
| - Gold: 90% or above in at least 5 quizzes of the same topic | |
| General badges: | |
| - Consistent Learner: completed at least 10 quizzes | |
| - Neurology Master: overall average score 85% or above after at least 5 quizzes | |
| """ | |
| df = load_attempts_df() | |
| if df.empty: | |
| return [] | |
| sdf = df[df["student_id"] == student_id].copy() | |
| if sdf.empty: | |
| return [] | |
| badges = set() | |
| for topic in TOPICS: | |
| topic_df = sdf[sdf["topic"] == topic] | |
| topic_short = topic.split(" /")[0] | |
| bronze_count = len(topic_df[topic_df["percent"] >= 70]) | |
| silver_count = len(topic_df[topic_df["percent"] >= 80]) | |
| gold_count = len(topic_df[topic_df["percent"] >= 90]) | |
| if bronze_count >= 2: | |
| badges.add(f"🥉 {topic_short} Bronze") | |
| if silver_count >= 3: | |
| badges.add(f"🥈 {topic_short} Silver") | |
| if gold_count >= 5: | |
| badges.add(f"🥇 {topic_short} Gold") | |
| if len(sdf) >= 10: | |
| badges.add("📘 Consistent Learner") | |
| if len(sdf) >= 5 and float(sdf["percent"].mean()) >= 85: | |
| badges.add("🏆 Neurology Master") | |
| return sorted(badges) | |
| # ===================================================== | |
| # AI FUNCTIONS | |
| # ===================================================== | |
| def safe_json_from_text(text: str): | |
| text = text.strip() | |
| text = re.sub(r"^```json", "", text, flags=re.I).strip() | |
| text = re.sub(r"^```", "", text).strip() | |
| text = re.sub(r"```$", "", text).strip() | |
| start = text.find("[") | |
| end = text.rfind("]") | |
| if start != -1 and end != -1 and end > start: | |
| text = text[start:end+1] | |
| return json.loads(text) | |
| def normalize_mcq_option(opt: Any, index: int) -> str: | |
| """Convert option formats into clean display strings like 'A. Text'.""" | |
| letter = chr(65 + index) | |
| if isinstance(opt, dict): | |
| opt_letter = str(opt.get("letter", letter)).strip().upper()[:1] or letter | |
| text = str(opt.get("text", opt.get("option", opt.get("value", "")))).strip() | |
| if not text: | |
| text = str(opt) | |
| return f"{opt_letter}. {text}" | |
| text = str(opt).strip() | |
| # If the model already gives 'A. text' or 'A) text', keep it clean. | |
| m = re.match(r"^([A-Ea-e])\s*[\.|\)]\s*(.+)$", text) | |
| if m: | |
| return f"{m.group(1).upper()}. {m.group(2).strip()}" | |
| return f"{letter}. {text}" | |
| def generate_mcqs(topic: str, difficulty: str, n_questions: int, language: str) -> Tuple[List[Dict[str, Any]], str]: | |
| records, err = search_hybrid(topic + " neurology PMQSN exam questions", final_k=8) | |
| context = build_context(records) | |
| examples = filter_question_examples(topic, limit=6) | |
| examples_text = json.dumps(examples, ensure_ascii=False)[:6000] | |
| client = get_client() | |
| if client is None: | |
| return fallback_mcqs(topic, n_questions, language), "OPENAI_API_KEY missing. Showing demo questions." | |
| lang_instruction = "Write everything in English." if language == "English" else "Escribe todo en español." | |
| requested_from_model = n_questions + 2 # ask for a few extra, then keep exactly the selected number | |
| prompt = f""" | |
| You are BrainChat, an exam-focused neurology tutor. | |
| Generate at least {requested_from_model} MCQ questions for the topic: {topic}. | |
| The final app will keep exactly {n_questions} questions, so do not return fewer than {n_questions}. | |
| Difficulty: {difficulty}. | |
| {lang_instruction} | |
| Rules: | |
| - Output ONLY a valid JSON array. | |
| - Each item must have: question, options, correct_option, explanation, subtopic. | |
| - options must be exactly 5 options labelled A, B, C, D, E. | |
| - The options may be strings only, for example: ["A. ...", "B. ...", "C. ...", "D. ...", "E. ..."]. | |
| - Only one correct answer. | |
| - Avoid generic demo questions such as "Option A". | |
| - The style should follow PMQSN neurology exam questions. | |
| - Use the course context as the main knowledge source. | |
| - Do not mention JSON, files, or internal retrieval. | |
| Past exam style examples: | |
| {examples_text} | |
| Course context: | |
| {context} | |
| """ | |
| try: | |
| resp = client.chat.completions.create( | |
| model=OPENAI_MODEL, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.30, | |
| ) | |
| mcqs = safe_json_from_text(resp.choices[0].message.content or "[]") | |
| clean: List[Dict[str, Any]] = [] | |
| for item in mcqs: | |
| if len(clean) >= n_questions: | |
| break | |
| opts = item.get("options", []) | |
| if isinstance(opts, dict): | |
| # Handles {"A":"...", "B":"..."} format | |
| opts = [{"letter": k, "text": v} for k, v in opts.items()] | |
| if not isinstance(opts, list): | |
| continue | |
| formatted_opts = [normalize_mcq_option(opt, idx) for idx, opt in enumerate(opts[:5])] | |
| if len(formatted_opts) != 5: | |
| continue | |
| question_text = str(item.get("question", "")).strip() | |
| if not question_text: | |
| continue | |
| correct = str(item.get("correct_option", item.get("answer", "A"))).strip().upper()[:1] | |
| if correct not in list("ABCDE"): | |
| correct = "A" | |
| explanation = str(item.get("explanation", "")).strip() | |
| if not explanation: | |
| explanation = "Review the selected topic and compare the clinical features carefully." | |
| clean.append({ | |
| "question": question_text, | |
| "options": formatted_opts, | |
| "correct_option": correct, | |
| "explanation": explanation, | |
| "subtopic": str(item.get("subtopic", topic)).strip() or topic, | |
| }) | |
| if len(clean) < n_questions: | |
| # Try one more small request instead of showing generic fallback questions. | |
| missing = n_questions - len(clean) | |
| retry_prompt = f""" | |
| Generate exactly {missing} additional PMQSN-style neurology MCQs for topic: {topic}. | |
| {lang_instruction} | |
| Return ONLY valid JSON array. Each question must have exactly 5 string options labelled A-E, one correct_option, explanation, and subtopic. | |
| Do not use generic placeholders. | |
| """ | |
| retry = client.chat.completions.create( | |
| model=OPENAI_MODEL, | |
| messages=[{"role": "user", "content": retry_prompt}], | |
| temperature=0.30, | |
| ) | |
| more = safe_json_from_text(retry.choices[0].message.content or "[]") | |
| for item in more: | |
| if len(clean) >= n_questions: | |
| break | |
| opts = item.get("options", []) | |
| if isinstance(opts, dict): | |
| opts = [{"letter": k, "text": v} for k, v in opts.items()] | |
| if not isinstance(opts, list): | |
| continue | |
| formatted_opts = [normalize_mcq_option(opt, idx) for idx, opt in enumerate(opts[:5])] | |
| if len(formatted_opts) != 5: | |
| continue | |
| question_text = str(item.get("question", "")).strip() | |
| if not question_text: | |
| continue | |
| correct = str(item.get("correct_option", item.get("answer", "A"))).strip().upper()[:1] | |
| if correct not in list("ABCDE"): | |
| correct = "A" | |
| clean.append({ | |
| "question": question_text, | |
| "options": formatted_opts, | |
| "correct_option": correct, | |
| "explanation": str(item.get("explanation", "Review the selected topic carefully.")), | |
| "subtopic": str(item.get("subtopic", topic)).strip() or topic, | |
| }) | |
| if clean: | |
| return clean[:n_questions], "" if len(clean) >= n_questions else "Generated fewer questions than requested." | |
| return fallback_mcqs(topic, n_questions, language), "Could not generate AI quiz. Showing demo questions." | |
| except Exception as e: | |
| return fallback_mcqs(topic, n_questions, language), f"AI generation failed: {e}" | |
| def fallback_mcqs(topic: str, n: int, language: str) -> List[Dict[str, Any]]: | |
| if language == "Spanish": | |
| q = f"Pregunta de práctica sobre {topic}: ¿cuál opción es más correcta?" | |
| exp = "Esta es una pregunta de demostración. Configure OPENAI_API_KEY y los materiales del curso para generar preguntas reales." | |
| else: | |
| q = f"Practice question on {topic}: which option is most correct?" | |
| exp = "This is a demo question. Configure OPENAI_API_KEY and course materials to generate real questions." | |
| return [{ | |
| "question": q, | |
| "options": ["A. Option A", "B. Option B", "C. Option C", "D. Option D", "E. Option E"], | |
| "correct_option": "A", | |
| "explanation": exp, | |
| "subtopic": topic, | |
| } for _ in range(n)] | |
| def answer_tutor_question(question: str, topic: str, language: str) -> Tuple[str, str, float]: | |
| records, err = search_hybrid(question + " " + topic, final_k=6) | |
| similarity = max([r.get("similarity_score", 0) for r in records], default=0.0) | |
| color = confidence_from_similarity(similarity) | |
| if err: | |
| return err, "red", 0.0 | |
| context = build_context(records) | |
| client = get_client() | |
| if client is None: | |
| return "OPENAI_API_KEY is missing. Please add it in Hugging Face Space Secrets.", "red", similarity | |
| lang_instruction = "Answer fully in English." if language == "English" else "Responde completamente en español." | |
| prompt = f""" | |
| You are BrainChat, a neurology tutor. {lang_instruction} | |
| Use the course context first. Explain clearly and simply. | |
| At the end, add a short revision tip. | |
| Topic: {topic} | |
| Question: {question} | |
| Course context: | |
| {context} | |
| """ | |
| resp = client.chat.completions.create( | |
| model=OPENAI_MODEL, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.25, | |
| ) | |
| return resp.choices[0].message.content or "", color, similarity | |
| # ===================================================== | |
| # REPORTS | |
| # ===================================================== | |
| def html_report_student(student_id: str, name: str, language: str) -> str: | |
| df = load_attempts_df() | |
| sdf = df[df["student_id"] == student_id] if not df.empty else pd.DataFrame() | |
| title = "Learning Report" if language == "English" else "Informe de aprendizaje" | |
| rows = "" | |
| if not sdf.empty: | |
| for _, r in sdf.iterrows(): | |
| rows += f"<tr><td>{html.escape(str(r['created_at']))}</td><td>{html.escape(str(r['topic']))}</td><td>{html.escape(str(r['difficulty']))}</td><td>{r['score']}/{r['total']}</td><td>{r['percent']:.1f}%</td><td>{html.escape(str(r['confidence_color']))}</td></tr>" | |
| else: | |
| rows = "<tr><td colspan='6'>No attempts yet.</td></tr>" | |
| badges = ", ".join(badges_for_student(student_id)) or "None" | |
| avg = sdf["percent"].mean() if not sdf.empty else 0 | |
| return f""" | |
| <!doctype html><html><head><meta charset='utf-8'><title>{title}</title> | |
| <style>body{{font-family:Arial;margin:30px;line-height:1.5}} .card{{border:1px solid #ddd;border-radius:12px;padding:18px;margin:12px 0}} table{{border-collapse:collapse;width:100%}} th,td{{border:1px solid #ddd;padding:8px;text-align:left}} th{{background:#f3f3f3}} .green{{color:#137333}} .orange{{color:#b06000}} .red{{color:#b00020}}</style></head><body> | |
| <h1>{title}</h1> | |
| <div class='card'><b>Student:</b> {html.escape(name)}<br><b>ID:</b> {html.escape(student_id)}<br><b>Generated:</b> {datetime.now().strftime('%Y-%m-%d %H:%M')}</div> | |
| <div class='card'><h2>Summary</h2><p><b>Average score:</b> {avg:.1f}%</p><p><b>Badges:</b> {html.escape(badges)}</p></div> | |
| <div class='card'><h2>How progress is measured</h2> | |
| <p><span class='green'><b>Green</b></span>: high confidence / strong performance.</p> | |
| <p><span class='orange'><b>Orange</b></span>: medium confidence / needs revision.</p> | |
| <p><span class='red'><b>Red</b></span>: low confidence / weak support or low performance.</p> | |
| <p><b>Similarity</b> is from 0 to 1 and shows how closely course material matched the question.</p> | |
| <p><b>Badges</b> are earned from quiz marks only: Bronze = 70% or above in 2 quizzes of the same topic; Silver = 80% or above in 3 quizzes; Gold = 90% or above in 5 quizzes. General badges are awarded for consistency and strong overall performance.</p></div> | |
| <div class='card'><h2>Quiz attempts</h2><table><tr><th>Date</th><th>Topic</th><th>Difficulty</th><th>Score</th><th>Percent</th><th>Confidence</th></tr>{rows}</table></div> | |
| </body></html> | |
| """ | |
| def html_report_teacher() -> str: | |
| df = load_attempts_df() | |
| if df.empty: | |
| body = "<p>No student data available yet.</p>" | |
| else: | |
| summary = df.groupby("topic").agg(attempts=("id", "count"), avg_score=("percent", "mean")).reset_index() | |
| body = "<h2>Topic summary</h2><table><tr><th>Topic</th><th>Attempts</th><th>Average score</th></tr>" | |
| for _, r in summary.iterrows(): | |
| body += f"<tr><td>{html.escape(str(r['topic']))}</td><td>{int(r['attempts'])}</td><td>{r['avg_score']:.1f}%</td></tr>" | |
| body += "</table><h2>Student attempts</h2><table><tr><th>Date</th><th>Student</th><th>Topic</th><th>Score</th><th>Weak areas</th></tr>" | |
| for _, r in df.iterrows(): | |
| body += f"<tr><td>{html.escape(str(r['created_at']))}</td><td>{html.escape(str(r['student_name']))}</td><td>{html.escape(str(r['topic']))}</td><td>{r['percent']:.1f}%</td><td>{html.escape(str(r['weak_areas']))}</td></tr>" | |
| body += "</table>" | |
| return f""" | |
| <!doctype html><html><head><meta charset='utf-8'><title>Teacher Class Report</title> | |
| <style>body{{font-family:Arial;margin:30px;line-height:1.5}} table{{border-collapse:collapse;width:100%}} th,td{{border:1px solid #ddd;padding:8px;text-align:left}} th{{background:#f3f3f3}}</style></head><body> | |
| <h1>Teacher Class Report</h1><p>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}</p>{body}</body></html> | |
| """ | |
| # ===================================================== | |
| # UI HELPERS | |
| # ===================================================== | |
| def t(key: str) -> str: | |
| lang = st.session_state.get("language", "English") | |
| return TRANSLATIONS[lang].get(key, key) | |
| def render_header(): | |
| c1, c2 = st.columns([1, 5]) | |
| with c1: | |
| if os.path.exists(LOGO_FILE): | |
| st.image(LOGO_FILE, width=90) | |
| else: | |
| st.markdown("# 🧠") | |
| with c2: | |
| st.title(APP_TITLE) | |
| st.caption(t("app_subtitle")) | |
| def confidence_badge(color: str): | |
| label = {"green": "🟢 Green", "orange": "🟠 Orange", "red": "🔴 Red"}.get(color, color) | |
| st.markdown(f"**Confidence:** {label}") | |
| def format_option(opt: Any) -> str: | |
| """Display options cleanly when they come as either strings or dictionaries.""" | |
| if isinstance(opt, dict): | |
| letter = str(opt.get("letter", "")).strip() | |
| text = str(opt.get("text", "")).strip() | |
| if letter and text: | |
| return f"{letter}. {text}" | |
| if text: | |
| return text | |
| return str(opt) | |
| return str(opt) | |
| # ===================================================== | |
| # STUDENT MODE | |
| # ===================================================== | |
| def student_mode(): | |
| st.info(t("student_tip")) | |
| with st.sidebar: | |
| student_id = st.text_input(t("student_id"), value=st.session_state.get("student_id", "")) | |
| topic = st.selectbox(t("topic"), TOPICS) | |
| st.session_state["student_id"] = student_id | |
| student_name = student_id or "Guest" | |
| st.session_state["student_name"] = student_name | |
| tab_chat, tab_quiz, tab_report = st.tabs([t("chat"), t("quiz"), t("report")]) | |
| with tab_chat: | |
| st.subheader(t("chat")) | |
| if st.session_state["language"] == "English": | |
| activity_options = [ | |
| "Free question — ask any neurology question", | |
| "Explanation for selected topic", | |
| "Flashcards for selected topic", | |
| "Case study for selected topic", | |
| ] | |
| else: | |
| activity_options = [ | |
| "Pregunta libre — cualquier pregunta de neurología", | |
| "Explicación del tema seleccionado", | |
| "Tarjetas de estudio del tema seleccionado", | |
| "Caso clínico del tema seleccionado", | |
| ] | |
| tutor_activity = st.selectbox( | |
| "Tutor activity" if st.session_state["language"] == "English" else "Actividad del tutor", | |
| activity_options, | |
| ) | |
| free_question = "Free question" in tutor_activity or "Pregunta libre" in tutor_activity | |
| if free_question: | |
| q = st.text_area(t("ask_question"), height=120) | |
| else: | |
| q = "" | |
| st.info( | |
| (f"This activity will be generated for the selected topic: **{topic}**." | |
| if st.session_state["language"] == "English" | |
| else f"Esta actividad se generará para el tema seleccionado: **{topic}**.") | |
| ) | |
| if st.button(t("send"), key="ask_btn"): | |
| if "Explanation" in tutor_activity or "Explicación" in tutor_activity: | |
| q_to_send = f"Explain this selected topic clearly for a medical student: {topic}" | |
| elif "Flashcards" in tutor_activity or "Tarjetas" in tutor_activity: | |
| q_to_send = f"Create 8 flashcards with question and answer for this selected topic: {topic}" | |
| elif "Case study" in tutor_activity or "Caso clínico" in tutor_activity: | |
| q_to_send = f"Create one clinical case study with questions, answer, and explanation for this selected topic: {topic}" | |
| else: | |
| q_to_send = q.strip() | |
| if not q_to_send: | |
| st.warning("Please write a question." if st.session_state["language"] == "English" else "Por favor escribe una pregunta.") | |
| else: | |
| with st.spinner("BrainChat is preparing the answer..."): | |
| ans, color, sim = answer_tutor_question(q_to_send, topic, st.session_state["language"]) | |
| save_chat_log(student_id or "Guest", st.session_state["language"], topic, q_to_send, ans, color, sim) | |
| confidence_badge(color) | |
| st.caption(f"Similarity: {sim:.2f}") | |
| st.markdown(ans) | |
| with tab_quiz: | |
| st.subheader(t("quiz")) | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| difficulty = st.selectbox(t("difficulty"), ["Easy", "Medium", "Exam level"]) | |
| with c2: | |
| n_questions = st.selectbox(t("num_questions"), [3, 5, 10, 15, 20], index=1) | |
| if st.button(t("start_quiz"), key="gen_quiz"): | |
| with st.spinner("Generating topic-based MCQ quiz..."): | |
| quiz, warn = generate_mcqs(topic, difficulty, n_questions, st.session_state["language"]) | |
| st.session_state["current_quiz"] = quiz | |
| st.session_state["quiz_topic"] = topic | |
| st.session_state["quiz_difficulty"] = difficulty | |
| if warn: | |
| st.warning(warn) | |
| quiz = st.session_state.get("current_quiz", []) | |
| if quiz: | |
| answers = {} | |
| for i, item in enumerate(quiz, 1): | |
| st.markdown(f"### Q{i}. {item['question']}") | |
| options_display = [format_option(opt) for opt in item["options"]] | |
| choice = st.radio("Select answer" if st.session_state["language"] == "English" else "Selecciona la respuesta", | |
| options_display, key=f"q_{i}") | |
| selected_letter = choice.strip()[0].upper() if choice else "" | |
| answers[str(i)] = selected_letter | |
| if st.button(t("submit_quiz"), key="submit_quiz"): | |
| score = 0 | |
| weak = [] | |
| st.markdown("## Results" if st.session_state["language"] == "English" else "## Resultados") | |
| for i, item in enumerate(quiz, 1): | |
| correct = item.get("correct_option", "A").upper() | |
| selected = answers.get(str(i), "") | |
| ok = selected == correct | |
| if ok: | |
| score += 1 | |
| else: | |
| weak.append(item.get("subtopic", topic)) | |
| st.markdown(f"**Q{i}: {'✅' if ok else '❌'} Selected: {selected} | Correct: {correct}**") | |
| st.write(item.get("explanation", "")) | |
| percent = score / max(len(quiz), 1) * 100 | |
| color = "green" if percent >= 70 else "orange" if percent >= 45 else "red" | |
| badges = badges_for_student(student_id or "Guest") | |
| save_quiz_attempt(student_id or "Guest", student_name or student_id or "Guest", st.session_state["language"], | |
| st.session_state.get("quiz_topic", topic), st.session_state.get("quiz_difficulty", difficulty), | |
| score, len(quiz), color, sorted(set(weak)), badges, quiz, answers) | |
| st.success(f"{t('score')}: {score}/{len(quiz)} ({percent:.1f}%). {t('saved')}") | |
| confidence_badge(color) | |
| if weak: | |
| st.warning(f"{t('weak_areas')}: {', '.join(sorted(set(weak)))}") | |
| with tab_report: | |
| st.subheader(t("report")) | |
| sid = student_id or "Guest" | |
| df = load_attempts_df() | |
| sdf = df[df["student_id"] == sid] if not df.empty else pd.DataFrame() | |
| if sdf.empty: | |
| st.info(t("no_data")) | |
| else: | |
| st.metric("Average score" if st.session_state["language"] == "English" else "Puntuación media", f"{sdf['percent'].mean():.1f}%") | |
| st.dataframe(sdf[["created_at", "topic", "difficulty", "score", "total", "percent", "confidence_color"]], use_container_width=True) | |
| fig = px.line(sdf.sort_values("created_at"), x="created_at", y="percent", color="topic", markers=True, | |
| title="Progress over time" if st.session_state["language"] == "English" else "Progreso en el tiempo") | |
| st.plotly_chart(fig, use_container_width=True) | |
| report_html = html_report_student(sid, student_name or sid, st.session_state["language"]) | |
| st.download_button(t("download_html"), data=report_html, file_name=f"brainchat_report_{sid}.html", mime="text/html") | |
| # ===================================================== | |
| # TEACHER MODE | |
| # ===================================================== | |
| def teacher_mode(): | |
| st.info(t("teacher_tip")) | |
| pwd = st.text_input(t("teacher_password"), type="password") | |
| if not st.button(t("login")) and not st.session_state.get("teacher_ok"): | |
| return | |
| if pwd == TEACHER_PASSWORD or st.session_state.get("teacher_ok"): | |
| st.session_state["teacher_ok"] = True | |
| else: | |
| st.error("Incorrect password") | |
| return | |
| df = load_attempts_df() | |
| chat_df = load_chat_df() | |
| if df.empty: | |
| st.warning(t("no_data")) | |
| return | |
| c1, c2, c3, c4 = st.columns(4) | |
| c1.metric("Students", df["student_id"].nunique()) | |
| c2.metric("Quiz attempts", len(df)) | |
| c3.metric("Average score", f"{df['percent'].mean():.1f}%") | |
| c4.metric("Low confidence", int((df["confidence_color"] == "red").sum())) | |
| st.subheader("Class analytics") | |
| topic_summary = df.groupby("topic").agg(attempts=("id", "count"), avg_score=("percent", "mean")).reset_index() | |
| fig1 = px.bar(topic_summary, x="topic", y="avg_score", hover_data=["attempts"], title="Average score by topic") | |
| st.plotly_chart(fig1, use_container_width=True) | |
| student_summary = df.groupby(["student_id", "student_name"]).agg(attempts=("id", "count"), avg_score=("percent", "mean")).reset_index() | |
| fig2 = px.bar(student_summary, x="student_name", y="avg_score", hover_data=["student_id", "attempts"], title="Average score by student") | |
| st.plotly_chart(fig2, use_container_width=True) | |
| st.subheader("Student-wise records") | |
| selected_student = st.selectbox("Select student", sorted(df["student_id"].unique())) | |
| sdf = df[df["student_id"] == selected_student] | |
| st.dataframe(sdf[["created_at", "student_name", "topic", "difficulty", "score", "total", "percent", "weak_areas", "badges"]], use_container_width=True) | |
| st.subheader("All quiz attempts") | |
| st.dataframe(df[["created_at", "student_id", "student_name", "topic", "difficulty", "score", "total", "percent", "confidence_color", "weak_areas"]], use_container_width=True) | |
| with st.expander("Tutor chat logs"): | |
| if chat_df.empty: | |
| st.info("No chat logs yet.") | |
| else: | |
| st.dataframe(chat_df[["created_at", "student_id", "topic", "question", "confidence_color", "similarity"]], use_container_width=True) | |
| report_html = html_report_teacher() | |
| st.download_button("Download teacher HTML report", data=report_html, file_name="brainchat_teacher_report.html", mime="text/html") | |
| # ===================================================== | |
| # MAIN | |
| # ===================================================== | |
| def main(): | |
| init_db() | |
| if "language" not in st.session_state: | |
| st.session_state["language"] = "English" | |
| with st.sidebar: | |
| st.session_state["language"] = st.radio("Interface language / Idioma", ["English", "Spanish"], horizontal=True) | |
| mode = st.radio(t("mode"), [t("student_mode"), t("teacher_mode")]) | |
| render_header() | |
| with st.expander(t("explain_logic"), expanded=False): | |
| st.markdown(f"- {t('confidence_green')}\n- {t('confidence_orange')}\n- {t('confidence_red')}\n- {t('similarity_help')}\n- {t('badge_help')}") | |
| if mode == t("student_mode"): | |
| student_mode() | |
| else: | |
| teacher_mode() | |
| if __name__ == "__main__": | |
| main() |