| import os |
| from typing import Any, Dict, List, Optional, Tuple |
|
|
| import gradio as gr |
| import httpx |
| import pandas as pd |
|
|
|
|
| |
| |
| |
|
|
| SAMPLE_LEDGER: List[Dict[str, Any]] = [ |
| {"concept": "Recursion base case", "status": "on_loan", "confidence": 0.92, "source_text": "Borrowed from AI explanation"}, |
| {"concept": "Binary search invariants", "status": "clear", "confidence": 0.88, "source_text": "Explained in own words"}, |
| {"concept": "Gradient descent", "status": "persists", "confidence": 0.79, "source_text": "Needs another clearing session"}, |
| {"concept": "RAG retrieval", "status": "on_loan", "confidence": 0.84, "source_text": "Added during prompt work"}, |
| ] |
|
|
| SAMPLE_HEATMAP = [ |
| {"concept": "Recursion base case", "count": 9}, |
| {"concept": "Binary search invariants", "count": 4}, |
| {"concept": "Gradient descent", "count": 7}, |
| ] |
|
|
| SAMPLE_METRICS = { |
| "active": 2, |
| "cleared": 1, |
| "persists": 1, |
| "debt_score": 67, |
| "pending_sync": 0, |
| "spoof_attempts": 3, |
| } |
|
|
|
|
| |
| |
| |
|
|
| def backend_snapshot(base_url: str) -> Optional[Dict[str, Any]]: |
| """Fetch live data from the knowledge backend API.""" |
| base = base_url.rstrip("/") |
| try: |
| with httpx.Client(timeout=6.0) as client: |
| state = client.get(f"{base}/api/state", timeout=6.0).json() |
| sync = client.get(f"{base}/api/sync/status", timeout=6.0).json() |
| integrity = client.get(f"{base}/api/integrity/report", timeout=6.0).json() |
| return {"state": state, "sync": sync, "integrity": integrity} |
| except Exception: |
| return None |
|
|
|
|
| def scout_demo(pasted_text: str) -> Tuple[str, str]: |
| """Simulate Scout tagging concepts from pasted text.""" |
| if not pasted_text.strip(): |
| return "", "Paste some text to see Scout extract concepts." |
| |
| extracted_concepts = [] |
| keywords = ["recursion", "binary search", "gradient descent", "rag", "algorithm", "model", "training", "inference"] |
| for keyword in keywords: |
| if keyword.lower() in pasted_text.lower(): |
| extracted_concepts.append(keyword.capitalize()) |
| |
| if not extracted_concepts: |
| extracted_concepts = ["Learning (inferred from context)"] |
| |
| result = f"**Scout detected {len(extracted_concepts)} concept(s):**\n\n" |
| for concept in extracted_concepts: |
| result += f"- {concept} (confidence: ~85%)\n" |
| |
| status = f"β
Tagged {len(extracted_concepts)} concept(s) β added to your ledger" |
| return result, status |
|
|
|
|
| def sage_demo(concept: str, user_response: str) -> str: |
| """Simulate Sage Socratic dialogue.""" |
| if not concept.strip(): |
| return "Enter a concept to start a clearing session." |
| |
| if not user_response.strip(): |
| return f"π¦ **Sage**: Let's talk about {concept}. Can you explain it in your own words?" |
| |
| quality_score = len(user_response.split()) / 10 |
| if quality_score < 2: |
| return f"π¦ **Sage**: That's a start. But can you go deeper? What makes {concept} special or different?" |
| elif quality_score < 4: |
| return f"π¦ **Sage**: Good effort! Now, why would someone use {concept} in practice? When does it matter?" |
| else: |
| return f"β
**Sage**: Excellent! You've clearly understood {concept}. This concept is now **CLEARED** in your ledger." |
|
|
|
|
| def get_ledger_display() -> pd.DataFrame: |
| """Return the current ledger as a DataFrame for display.""" |
| backend_url = os.environ.get("KNOWLEDGE_API_URL", "").strip() |
| snapshot = backend_snapshot(backend_url) if backend_url else None |
| ledger = snapshot["state"]["debts"] if snapshot else SAMPLE_LEDGER |
| |
| df = pd.DataFrame(ledger) |
| if not df.empty: |
| df = df[["concept", "status", "confidence"]] |
| return df |
|
|
|
|
| def get_metrics() -> Tuple[int, int, int, int]: |
| """Return key metrics: active, cleared, persists, debt_score.""" |
| backend_url = os.environ.get("KNOWLEDGE_API_URL", "").strip() |
| snapshot = backend_snapshot(backend_url) if backend_url else None |
| ledger = snapshot["state"]["debts"] if snapshot else SAMPLE_LEDGER |
| |
| active = sum(1 for row in ledger if row.get("status") in {"on_loan", "persists"}) |
| cleared = sum(1 for row in ledger if row.get("status") in {"clear", "owned"}) |
| persists = sum(1 for row in ledger if row.get("status") == "persists") |
| debt_score = round(((active + persists) / max(len(ledger), 1)) * 100) if ledger else 0 |
| |
| return active, cleared, persists, debt_score |
|
|
|
|
| |
| |
| |
|
|
| with gr.Blocks( |
| title="KnowLedge | Gemma 4 for Good", |
| theme=gr.themes.Soft( |
| primary_hue="slate", |
| secondary_hue="amber", |
| ), |
| ) as demo: |
| |
| gr.Markdown( |
| """ |
| # π¦ KnowLedge |
| |
| **A local-first learning verification system for the Gemma 4 for Good hackathon.** |
| |
| KnowLedge turns pasted AI-assisted work into a guided mastery loop: |
| - **Scout** extracts concepts from text |
| - **Sage** clears them through Socratic dialogue |
| - **Lens** verifies understanding with integrity checks |
| - **Reports** share only anonymous aggregates with instructors |
| """ |
| ) |
| |
| with gr.Row(): |
| active, cleared, persists, debt_score = get_metrics() |
| |
| with gr.Column(scale=1): |
| gr.Markdown(f"### π Active\n\n**{active}** concepts on loan or persisting") |
| with gr.Column(scale=1): |
| gr.Markdown(f"### β
Cleared\n\n**{cleared}** owned concepts") |
| with gr.Column(scale=1): |
| gr.Markdown(f"### π― Debt Score\n\n**{debt_score}%** (lower is better)") |
| with gr.Column(scale=1): |
| gr.Markdown(f"### π‘οΈ Integrity\n\n**{SAMPLE_METRICS['spoof_attempts']}** spoof signals detected") |
| |
| with gr.Tabs(): |
| |
| |
| |
| |
| with gr.TabItem("π Scout β Extract Concepts"): |
| gr.Markdown( |
| """ |
| **Scout** automatically finds concepts inside pasted text. Try pasting an explanation or code snippet below. |
| """ |
| ) |
| |
| with gr.Row(): |
| with gr.Column(scale=2): |
| pasted_text = gr.Textbox( |
| label="Paste AI-assisted work or notes here", |
| placeholder="e.g., 'Recursion works by dividing a problem into smaller subproblems until reaching a base case...'", |
| lines=6, |
| ) |
| with gr.Column(scale=1): |
| scout_btn = gr.Button("π Run Scout", size="lg") |
| |
| scout_output = gr.Markdown("Paste something to get started.") |
| scout_status = gr.Textbox(label="Status", interactive=False, value="Ready.") |
| |
| scout_btn.click( |
| scout_demo, |
| inputs=[pasted_text], |
| outputs=[scout_output, scout_status], |
| ) |
| |
| |
| |
| |
| with gr.TabItem("π¦ Sage β Socratic Clearing"): |
| gr.Markdown( |
| """ |
| **Sage** guides you through a Socratic clearing session. Pick a concept and explain it in your own words. |
| """ |
| ) |
| |
| concept_input = gr.Textbox( |
| label="Concept to clear", |
| placeholder="e.g., 'Binary Search'", |
| value="Recursion", |
| ) |
| |
| response_input = gr.Textbox( |
| label="Your explanation", |
| placeholder="Explain the concept in your own words. Be as detailed as you can.", |
| lines=4, |
| ) |
| |
| sage_btn = gr.Button("π Get Sage Response", size="lg") |
| sage_output = gr.Markdown() |
| |
| sage_btn.click( |
| sage_demo, |
| inputs=[concept_input, response_input], |
| outputs=[sage_output], |
| ) |
| |
| |
| |
| |
| with gr.TabItem("π Live Ledger"): |
| gr.Markdown( |
| """ |
| This is your concept ledger. Every concept you study is tracked here with its status: |
| - **on_loan**: You pasted it but haven't cleared it yet. |
| - **clear**: You explained it to Sage and passed. |
| - **persists**: Lens found gaps, needs another session. |
| """ |
| ) |
| |
| ledger_df = get_ledger_display() |
| ledger_table = gr.Dataframe( |
| value=ledger_df, |
| interactive=False, |
| wrap=True, |
| ) |
| |
| refresh_btn = gr.Button("π Refresh Ledger") |
| refresh_btn.click( |
| lambda: get_ledger_display(), |
| outputs=[ledger_table], |
| ) |
| |
| |
| |
| |
| with gr.TabItem("ποΈ Architecture"): |
| gr.Markdown( |
| """ |
| ## System Design |
| |
| **Backend Stack:** |
| - FastAPI for the web server |
| - SQLite for concept ledger persistence |
| - ChromaDB for curriculum context (RAG) |
| - Ollama for local Gemma inference |
| - Privacy-preserving sync with concept-level aggregates only |
| |
| **Key Features:** |
| - Offline-first (no cloud dependency) |
| - Session fingerprinting to detect gaming behavior |
| - Anti-spoof scoring on Lens uploads |
| - No student identifiers in instructor reports |
| |
| ## Quick Local Setup |
| |
| ```bash |
| python3 -m venv .venv |
| source .venv/bin/activate |
| pip install -r requirements.txt |
| |
| # Terminal 1: Start Ollama |
| ollama serve |
| |
| # Terminal 2: Start the backend |
| python -m uvicorn knowledge.main:app --host 127.0.0.1 --port 8000 |
| |
| # Optional: Load curriculum material |
| python -m knowledge.vectorize path/to/course.pdf |
| ``` |
| |
| ## Deploy on Hugging Face Spaces |
| |
| 1. Create a new Space with **Gradio** SDK |
| 2. Push this repo to the Space |
| 3. Set `KNOWLEDGE_API_URL` environment variable (optional, for live backend) |
| 4. Share the Space URL as your Kaggle demo link |
| """ |
| ) |
| |
| |
| |
| |
| with gr.TabItem("π― For Kaggle Judges"): |
| gr.Markdown( |
| """ |
| ## What Makes KnowLedge Different |
| |
| Most AI tutoring tools are **answer machines**. KnowLedge is a **verification system**. |
| |
| ### The Problem |
| - Students copy AI-generated code without understanding |
| - Traditional quizzes can be gamed with a second AI |
| - Instructors have no way to detect this pattern |
| |
| ### The KnowLedge Solution |
| - **Scout** logs every concept you borrow from AI |
| - **Sage** forces you to explain it yourself before you own it |
| - **Lens** checks handwritten work for logic gaps |
| - **Integrity** fingerprints your session to catch repeat gaming |
| - **Sync** gives instructors only concept-level aggregates (privacy-first) |
| |
| ### Why This Matters |
| - Fixes a **real problem** in modern education |
| - Uses **Gemma 4** for both extraction and dialogue |
| - Runs **fully offline** for maximum privacy |
| - Provides a **zero-setup** public demo (this Space) |
| |
| ### Key Metrics |
| - **Debt Score**: How much of your work is still "borrowed" |
| - **Spoof Attempts**: Session fingerprinting detects copy-paste patterns |
| - **Integrity Signals**: Anti-gaming scoring from Lens |
| |
| This is what judges should look for. Not an answering machine. A **learning enforcer**. |
| """ |
| ) |
|
|
|
|
| |
| if __name__ == "__main__": |
| demo.launch() |
|
|