Spaces:

ashutoshzade
/

tensor-runtime-lab

Build error

File size: 16,991 Bytes

9935bd7

"""
TENSOR Runtime Lab — HuggingFace Space
Transformer-Native Computational Paradigm Research Demo
Author: ashutoshzade
"""

import gradio as gr
import anthropic
import json
import time
import os
import pandas as pd
import numpy as np
from datetime import datetime

from benchmark import run_icu_benchmark, get_benchmark_summary
from latent_inspector import get_attention_summary, get_wolfram_verification

# ---------------------------------------------------------------------------
# Anthropic client — set ANTHROPIC_API_KEY in HF Space secrets
# ---------------------------------------------------------------------------
def get_client():
    api_key = os.environ.get("ANTHROPIC_API_KEY", "")
    if not api_key:
        raise ValueError("ANTHROPIC_API_KEY not set. Add it in Space Settings → Secrets.")
    return anthropic.Anthropic(api_key=api_key)


# ---------------------------------------------------------------------------
# TAB 1 — TENSOR Runtime: algorithm selection + implementation
# ---------------------------------------------------------------------------
RUNTIME_SYSTEM = """You are the TENSOR Runtime — a transformer-native computational engine.

When given a problem description and sample data, you:
1. SELECT the single best algorithm for the task (be specific: e.g. "XGBoost classifier" not just "tree model")
2. STATE WHY in one sentence referencing the data characteristics
3. IMPLEMENT a clean, runnable Python snippet (use sklearn, numpy, pandas only)
4. RATE your confidence 1-10 and explain any caveats

Respond in this exact JSON structure:
{
  "algorithm": "<name>",
  "rationale": "<one sentence>",
  "code": "<python snippet, properly escaped>",
  "confidence": <int 1-10>,
  "caveats": "<any important limitations or assumptions>",
  "complexity": "<time complexity of the algorithm>",
  "alternatives": ["<alt1>", "<alt2>"]
}

Return ONLY the JSON — no markdown, no preamble.
"""

EXAMPLE_PROBLEMS = {
    "ICU deterioration (vitals time-series)": {
        "problem": "Predict patient deterioration in the next 6 hours using ICU vital sign time-series. Binary classification: deteriorate vs stable. Need high recall to avoid missing critical events.",
        "data": "heart_rate,bp_systolic,spo2,resp_rate,temp_c,label\n88,122,97,18,37.1,0\n102,108,94,22,37.8,0\n118,96,91,26,38.2,1\n95,114,96,19,37.3,0\n130,88,88,30,38.9,1"
    },
    "Time-series anomaly detection": {
        "problem": "Detect anomalous sensor readings in a manufacturing line. Unsupervised — no labels available. Need to flag the top 5% of unusual readings for human review.",
        "data": "timestamp,sensor_a,sensor_b,sensor_c,vibration\n1,0.82,1.1,0.9,0.3\n2,0.79,1.2,0.88,0.31\n3,0.81,1.09,0.91,0.29\n4,3.42,0.5,2.1,1.8\n5,0.80,1.11,0.90,0.30"
    },
    "Patient readmission (tabular, mixed types)": {
        "problem": "Predict 30-day hospital readmission from structured EHR discharge data. Mix of numeric and categorical features. Dataset is imbalanced (8% positive class). Interpretability matters for clinical staff.",
        "data": "age,gender,diagnosis_code,num_procedures,insurance,prior_admissions,readmitted\n67,M,I50.9,3,Medicare,2,1\n45,F,J18.9,1,Private,0,0\n72,M,I21.0,5,Medicare,4,1\n38,F,K35.80,2,Medicaid,1,0\n81,M,I50.9,2,Medicare,6,1"
    },
    "Custom problem": {
        "problem": "",
        "data": ""
    }
}

def run_tensor_runtime(problem_template, custom_problem, custom_data, api_key_override):
    """Core H1 experiment: transformer selects + implements algorithm."""
    
    if problem_template != "Custom problem":
        problem = EXAMPLE_PROBLEMS[problem_template]["problem"]
        data = EXAMPLE_PROBLEMS[problem_template]["data"]
    else:
        problem = custom_problem.strip()
        data = custom_data.strip()
    
    if not problem:
        return "⚠️ Please describe your problem.", "", "", ""
    
    prompt = f"""PROBLEM STATEMENT:
{problem}

SAMPLE DATA (CSV):
{data if data else "(no data provided — infer from problem description)"}

Select the best algorithm, implement it, and return the JSON response."""

    start_time = time.time()
    
    try:
        client_key = api_key_override.strip() if api_key_override.strip() else os.environ.get("ANTHROPIC_API_KEY", "")
        if not client_key:
            return "⚠️ No API key. Set ANTHROPIC_API_KEY in Space secrets or enter it above.", "", "", ""
        
        client = anthropic.Anthropic(api_key=client_key)
        
        message = client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=1500,
            system=RUNTIME_SYSTEM,
            messages=[{"role": "user", "content": prompt}]
        )
        
        elapsed = time.time() - start_time
        raw = message.content[0].text.strip()
        
        try:
            result = json.loads(raw)
        except json.JSONDecodeError:
            import re
            json_match = re.search(r'\{.*\}', raw, re.DOTALL)
            if json_match:
                result = json.loads(json_match.group())
            else:
                return f"⚠️ Parse error. Raw response:\n{raw}", "", "", ""
        
        algo_display = f"""## 🔬 TENSOR Selected: `{result.get('algorithm', 'Unknown')}`

**Confidence:** {'⭐' * result.get('confidence', 0)} {result.get('confidence', 0)}/10

**Rationale:** {result.get('rationale', '')}

**Time complexity:** {result.get('complexity', 'N/A')}

**Caveats:** {result.get('caveats', 'None noted')}

**Alternatives considered:** {', '.join(result.get('alternatives', []))}

---
*Inference time: {elapsed:.2f}s | Model: claude-sonnet-4-20250514*
"""
        
        code_display = result.get('code', '# No code generated')
        
        log_entry = json.dumps({
            "timestamp": datetime.utcnow().isoformat(),
            "problem_type": problem_template,
            "selected_algorithm": result.get('algorithm'),
            "confidence": result.get('confidence'),
            "inference_time_s": round(elapsed, 3)
        }, indent=2)
        
        h1_evidence = f"""### H1 Evidence Log
This call demonstrates the transformer:
- **Selected** an algorithm without being given choices
- **Justified** selection based on data characteristics  
- **Implemented** runnable code from intent alone
- **Quantified** its own uncertainty (confidence {result.get('confidence')}/10)

This is the core TENSOR claim: replacing the algorithm-selection-implementation workflow with a single transformer call.
"""
        
        return algo_display, code_display, log_entry, h1_evidence
    
    except Exception as e:
        return f"⚠️ Error: {str(e)}", "", "", ""


# ---------------------------------------------------------------------------
# TAB 2 — ICU Benchmark (H2: efficiency)
# ---------------------------------------------------------------------------
def run_benchmark_tab(n_patients, api_key_override):
    """H2 experiment: TENSOR vs traditional pipeline on synthetic ICU data."""
    
    client_key = api_key_override.strip() if api_key_override.strip() else os.environ.get("ANTHROPIC_API_KEY", "")
    
    results = run_icu_benchmark(n_patients=int(n_patients), api_key=client_key)
    summary = get_benchmark_summary(results)
    
    return (
        summary["comparison_table"],
        summary["metrics_plot"],
        summary["cost_analysis"],
        summary["h2_conclusion"]
    )


# ---------------------------------------------------------------------------
# TAB 3 — Latent Inspector (H2/H3: verification + transparency)
# ---------------------------------------------------------------------------
def run_latent_inspection(patient_data, api_key_override):
    """Show attention patterns and Wolfram verification for a prediction."""
    
    client_key = api_key_override.strip() if api_key_override.strip() else os.environ.get("ANTHROPIC_API_KEY", "")
    
    attention_html = get_attention_summary(patient_data, api_key=client_key)
    wolfram_log = get_wolfram_verification(patient_data)
    
    return attention_html, wolfram_log


# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------
CUSTOM_CSS = """
.tab-nav button { font-weight: 600; }
.result-box { font-family: monospace; }
.highlight { background: #f0f4ff; border-left: 4px solid #4f46e5; padding: 12px; border-radius: 4px; }
"""

HEADER_MD = """# 🧠 TENSOR Runtime Lab
### Transformer-Native Computational Paradigm Research
**Hypothesis:** A transformer with a human-readable interface can replace the traditional algorithm-selection → implementation → test workflow for a broad class of computational problems.

*Research by [ashutoshzade](https://huggingface.co/ashutoshzade) | Paper submitted June 2nd, 2026*

---
"""

with gr.Blocks(
    title="TENSOR Runtime Lab",
    css=CUSTOM_CSS,
    theme=gr.themes.Soft(primary_hue="indigo")
) as demo:
    
    gr.Markdown(HEADER_MD)
    
    # Shared API key (optional override for local testing)
    with gr.Accordion("🔑 API Key (optional — set in Space Secrets for production)", open=False):
        api_key_input = gr.Textbox(
            label="Anthropic API Key override",
            placeholder="sk-ant-... (leave blank if key is set in Space Secrets)",
            type="password",
            scale=1
        )
    
    with gr.Tabs():
        
        # ── TAB 1: TENSOR Runtime ──────────────────────────────────────────
        with gr.Tab("⚡ H1 — Runtime (Algorithm Selection)"):
            gr.Markdown("""
### Hypothesis 1
> *Can a transformer replace the traditional: problem → algorithm selection → implementation → test workflow?*

Enter a problem description and sample data. TENSOR selects the algorithm, explains why, and writes the code.
""")
            with gr.Row():
                with gr.Column(scale=1):
                    problem_dropdown = gr.Dropdown(
                        choices=list(EXAMPLE_PROBLEMS.keys()),
                        value="ICU deterioration (vitals time-series)",
                        label="Problem template"
                    )
                    custom_problem_box = gr.Textbox(
                        label="Custom problem description",
                        placeholder="Describe your ML problem, constraints, and any domain knowledge...",
                        lines=4,
                        visible=False
                    )
                    custom_data_box = gr.Textbox(
                        label="Sample data (CSV format, 5-10 rows)",
                        placeholder="col1,col2,label\n...",
                        lines=6,
                        visible=False
                    )
                    run_runtime_btn = gr.Button("▶ Run TENSOR Runtime", variant="primary")
                
                with gr.Column(scale=2):
                    algo_output = gr.Markdown(label="Algorithm selection + rationale")
                    code_output = gr.Code(language="python", label="Generated implementation")
            
            with gr.Row():
                log_output = gr.Code(language="json", label="Runtime log (H1 evidence)")
                h1_evidence_output = gr.Markdown(label="Research note")
            
            def toggle_custom(choice):
                visible = choice == "Custom problem"
                return gr.update(visible=visible), gr.update(visible=visible)
            
            problem_dropdown.change(toggle_custom, problem_dropdown, [custom_problem_box, custom_data_box])
            
            run_runtime_btn.click(
                run_tensor_runtime,
                inputs=[problem_dropdown, custom_problem_box, custom_data_box, api_key_input],
                outputs=[algo_output, code_output, log_output, h1_evidence_output]
            )
        
        # ── TAB 2: ICU Benchmark ───────────────────────────────────────────
        with gr.Tab("📊 H2 — ICU Benchmark (Efficiency)"):
            gr.Markdown("""
### Hypothesis 2
> *Is transformer-native computation efficient vs. traditional ML pipelines?*

Runs TENSOR against a hand-tuned XGBoost baseline on synthetic ICU deterioration data.
Measures AUC-ROC, AUPRC, latency, and engineering cost.
""")
            with gr.Row():
                n_patients_slider = gr.Slider(
                    minimum=20, maximum=200, value=50, step=10,
                    label="Synthetic patient cohort size"
                )
                run_benchmark_btn = gr.Button("▶ Run Benchmark", variant="primary")
            
            comparison_table = gr.Dataframe(label="TENSOR vs. XGBoost baseline — metrics comparison")
            
            with gr.Row():
                metrics_plot = gr.Plot(label="Performance comparison")
                cost_analysis = gr.Markdown(label="Engineering cost analysis (H3 preview)")
            
            h2_conclusion = gr.Markdown(label="H2 research conclusion")
            
            run_benchmark_btn.click(
                run_benchmark_tab,
                inputs=[n_patients_slider, api_key_input],
                outputs=[comparison_table, metrics_plot, cost_analysis, h2_conclusion]
            )
        
        # ── TAB 3: Latent Inspector ────────────────────────────────────────
        with gr.Tab("🔍 H3 — Latent Inspector (Verification)"):
            gr.Markdown("""
### Hypothesis 3 — Transparency & Verification
> *Can we inspect and verify transformer reasoning for trust in high-stakes domains?*

Paste ICU patient vitals. TENSOR predicts deterioration, explains which temporal features drove the decision, and runs symbolic verification.
""")
            patient_input = gr.Textbox(
                label="Patient vitals sequence (CSV)",
                value="hour,heart_rate,bp_systolic,spo2,resp_rate,temp_c\n0,78,120,98,16,36.9\n1,82,118,97,17,37.0\n2,91,112,95,19,37.3\n3,105,102,92,23,37.8\n4,118,94,89,27,38.2",
                lines=8
            )
            run_inspect_btn = gr.Button("▶ Inspect Latent Reasoning", variant="primary")
            
            with gr.Row():
                attention_output = gr.HTML(label="Temporal attention weights (which timesteps mattered)")
                wolfram_output = gr.Textbox(
                    label="Symbolic verification log (Wolfram-style constraint checks)",
                    lines=15
                )
        
        run_inspect_btn.click(
            run_latent_inspection,
            inputs=[patient_input, api_key_input],
            outputs=[attention_output, wolfram_output]
        )
        
        # ── TAB 4: About / Paper ──────────────────────────────────────────
        with gr.Tab("📄 About TENSOR"):
            gr.Markdown("""
## TENSOR — Temporal Engine for Neural Search & Optimization Runtime

### Core Thesis
Transformer-native computational paradigms may absorb significant portions of forecasting, search, optimization, routing, planning, and temporal reasoning systems into unified tensor-based runtimes.

### Three Hypotheses Tested Here

| | Hypothesis | Demonstration |
|---|---|---|
| **H1** | Transformer can replace algorithm selection + implementation workflow | Tab 1: Runtime |
| **H2** | Transformer-native approach is efficient vs. hand-crafted pipelines | Tab 2: ICU Benchmark |
| **H3** | This can scale economically and be verified symbolically | Tab 3: Latent Inspector |

### Architecture
```
User Intent + Data
       ↓
TENSOR Runtime (Claude Sonnet)
       ↓
Latent Computational Operations
       ↓
Symbolic Verification Layer (Wolfram-style)
       ↓
Explainable Output + Evidence Log
```

### Primary Benchmark
**ICU Deterioration Forecasting** — chosen because it requires:
- Temporal reasoning over multivariate sequences
- Anomaly detection under noise
- High-recall classification (missing a deterioration = harm)
- Interpretable decisions (clinical trust requirement)

### Verification Philosophy
All TENSOR predictions are passed through deterministic constraint checks:
- Vital sign range validation (physiologically plausible?)
- Trend consistency (monotonic deterioration vs. spike?)
- Confidence calibration (does stated confidence match prediction error rate?)

### Citation
```
@misc{tensor2026,
  title={TENSOR: Transformer-Native Computational Paradigm},
  author={Zade, Ashutosh},
  year={2026},
  url={https://huggingface.co/spaces/ashutoshzade/tensor-runtime-lab}
}
```

### Links
- 🤗 [HuggingFace Profile](https://huggingface.co/ashutoshzade)
- 📧 Paper submission: June 2nd, 2026
""")

demo.launch()