Spaces:

st192011
/

Janus_Interface

Running

App Files Files Community

st192011 commited on 28 days ago

Commit

37bcc3a

verified ·

1 Parent(s): c88a09a

Create app.py

Browse files

Files changed (1) hide show

app.py +308 -0

app.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import gradio as gr
+import torch
+import json
+import os
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+from huggingface_hub import InferenceClient
+# ==============================================================================
+# 1. CONFIGURATION
+# ==============================================================================
+# NOTE: You must set 'HF_TOKEN' in your Hugging Face Space Secrets!
+HF_TOKEN = os.getenv("HF_TOKEN")
+PROJECT_TITLE = "The Janus Interface: Semantic Decoupling Architecture"
+# Models
+# We use the official Microsoft repo for CPU compatibility
+BASE_MODEL_ID = "microsoft/Phi-3.5-mini-instruct"
+ADAPTER_ID = "st192011/janus-gold-lora"
+CLOUD_MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
+# ==============================================================================
+# 2. ENGINE INITIALIZATION (CPU Optimized)
+# ==============================================================================
+print("⏳ Initializing Neural Backbone (CPU Mode)...")
+try:
+    # Load Tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
+    # Load Base Model (bfloat16 saves RAM on Free Tier Spaces)
+    base_model = AutoModelForCausalLM.from_pretrained(
+        BASE_MODEL_ID,
+        torch_dtype=torch.bfloat16,
+        device_map="cpu",
+        trust_remote_code=True
+    )
+    # Load Adapter
+    print(f"⏳ Mounting Janus Adapter ({ADAPTER_ID})...")
+    model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
+    model.eval() # Set to inference mode
+    print("✅ System Online.")
+except Exception as e:
+    print(f"❌ Error loading model: {e}")
+    raise e
+# Cloud Client
+hf_client = InferenceClient(model=CLOUD_MODEL_ID, token=HF_TOKEN)
+# ==============================================================================
+# 3. KERNEL LOGIC
+# ==============================================================================
+def clean_output(text):
+    """Sanitizes output to prevent chain-reaction failures."""
+    # Remove special tokens
+    clean = text.replace("<|end|>", "").replace("<|endoftext|>", "")
+    # Remove conversational filler lines
+    if "Output:" in clean: clean = clean.split("Output:")[-1]
+    lines = clean.split('\n')
+    # Keep lines that look like protocol code or normal text, remove "Here is..."
+    valid_lines = [line for line in lines if "Note" not in line and "Here is" not in line]
+    return " ".join(valid_lines).strip()
+def kernel_scout(raw_input):
+    """Mode A: Local Logic Extraction"""
+    try:
+        prompt = f"""<|system|>
+SYSTEM_ROLE: Janus Extractor.
+TASK: Refactor clinical notes into JanusScript Logic.
+SYNTAX: Object.action(params) -> Result.
+OBJECTS: Hx, Sx, Dx, Tx, Lab, Crs, Plan.
+CONSTRAINTS: No PII. Use relative time (Day1, Day2).
+<|end|>
+<|user|>
+RAW NOTE:
+{raw_input}<|end|>
+<|assistant|>"""
+        inputs = tokenizer(prompt, return_tensors="pt")
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=256,
+                temperature=0.1,
+                do_sample=True
+            )
+        text = tokenizer.batch_decode(outputs)[0]
+        raw_output = text.split("<|assistant|>")[-1]
+        return clean_output(raw_output)
+    except Exception as e: return f"Error: {str(e)}"
+def kernel_cloud_expert(scenario_prompt):
+    """Mode B: Cloud Bridge"""
+    try:
+        sys_prompt = """You are a Clinical Logic Engine.
+Task: Convert the scenario into 'JanusScript' code.
+Syntax: Object.action(parameter);
+Objects: Dx, Sx, Tx, Lab, Plan.
+Rules: No PII. Use PascalCase.
+Example:
+Input: Pt has pneumonia. Given antibiotics.
+Output: Dx(Pneumonia); Sx(Fever+Cough); Tx(Meds).action(Antibiotics); Plan(Discharge.Home);"""
+        messages = [
+            {"role": "system", "content": sys_prompt},
+            {"role": "user", "content": f"Input: {scenario_prompt}"}
+        ]
+        response = hf_client.chat_completion(messages, max_tokens=512, temperature=0.1)
+        return clean_output(response.choices[0].message.content)
+    except Exception as e: return f"API Error: {str(e)}"
+def kernel_vault(protocol, secure_json):
+    """Shared Terminal: Reconstruction"""
+    try:
+        try: db_str = json.dumps(json.loads(secure_json), ensure_ascii=False)
+        except: return "❌ Error: Invalid JSON."
+        prompt = f"""<|system|>
+SYSTEM_ROLE: Janus Constructor.
+TASK: Interpret JanusScript and PrivateDB to write Discharge Summary.
+TEMPLATE: Header -> Dates -> History -> Hospital Course -> Plan.
+<|end|>
+<|user|>
+PROTOCOL:
+{protocol}
+PRIVATE_DB:
+{db_str}<|end|>
+<|assistant|>"""
+        inputs = tokenizer(prompt, return_tensors="pt")
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=1024,
+                temperature=0.1,
+                repetition_penalty=1.05,
+                do_sample=True
+            )
+        text = tokenizer.batch_decode(outputs)[0]
+        doc = text.split("<|assistant|>")[-1].replace("<|end|>", "").replace("<|endoftext|>", "").strip()
+        return doc
+    except Exception as e: return f"Error: {str(e)}"
+# ==============================================================================
+# 4. DEMO SAMPLES
+# ==============================================================================
+# Case 1: Appendicitis (Local)
+sample_note = """Pt ID 8899-A.
+History: 28yo male presented with RLQ pain & fever.
+Workup: CT scan confirmed acute appy.
+Course: Taken to OR for lap appendectomy. Uncomplicated. Tolerated diet next day.
+Plan: Discharge home. Pain controlled on oral meds."""
+sample_db = """{
+    "pt_name": "Elias Thorne",
+    "pt_mrn": "8899-A",
+    "pt_dob": "1995-03-12",
+    "pt_sex": "M",
+    "adm_date": "2025-02-10",
+    "dis_date": "2025-02-12",
+    "prov_attending": "Dr. Wu",
+    "prov_specialty": "General Surgery"
+}"""
+# Case 2: Sepsis (Cloud)
+sample_scenario = """Patient admitted for Urosepsis.
+Culture: E. coli resistant to Cipro.
+Treatment: Started on Zosyn IV. Transferred to ICU for one day for hypotension.
+Transition: Switched to oral Augmentin.
+Outcome: Stable, Afebrile. Discharge to finish 14 day course."""
+sample_db_cloud = """{
+    "pt_name": "Sarah Connor",
+    "pt_mrn": "SKY-NET",
+    "pt_dob": "1965-05-10",
+    "pt_sex": "F",
+    "adm_date": "2025-12-01",
+    "dis_date": "2025-12-05",
+    "prov_attending": "Dr. Silberman",
+    "prov_specialty": "Internal Medicine"
+}"""
+# ==============================================================================
+# 5. TECHNICAL REPORT
+# ==============================================================================
+report_md = """
+# 🏛️ The Janus Interface: Research & Technical Analysis
+**Project Status:** Research Prototype v2.0 (Gold Standard)
+---
+### 1. Research Motivation: The Privacy-Utility Paradox
+In regulated domains (Healthcare, Legal, Finance), Generative AI adoption is stalled by a fundamental conflict:
+*   **Utility:** Large Cloud Models (GPT-4, Claude) offer superior reasoning but require sending data off-premise.
+*   **Privacy:** Local Small Models (SLMs) ensure data sovereignty but often lack deep domain knowledge.
+*   **The Solution:** **Semantic Decoupling**. We propose separating the **"Logic"** of a case from the **"Identity"** of the subject.
+### 2. Architectural Design: The Twin-Protocol
+The system utilizes a **Multi-Task Adapter** trained to switch between two distinct cognitive modes based on the System Prompt.
+#### **Mode A: The Scout (Logic Extractor)**
+*   **Function:** Reads raw, messy clinical notes.
+*   **Constraint:** Trained via Loss Masking to extract *only* clinical entities (`Dx`, `Tx`, `Plan`) into a sanitized code string called **JanusScript**.
+*   **Security:** It treats names, dates, and locations as noise to be discarded.
+#### **Mode B: The Cloud Bridge (Knowledge Injection)**
+*   **Function:** Allows an external Cloud LLM to reason about a generic, anonymized scenario.
+*   **Innovation:** The Cloud Model generates the **JanusScript** code. This code acts as a firewall—no PII ever leaves the local environment, but the *intelligence* of the cloud is captured in the script.
+#### **The Vault (Reconstructor)**
+*   **Function:** A secure, offline engine that accepts the JanusScript and a Local SQL Database record.
+*   **Output:** It merges the abstract logic with the concrete identity to generate the final, human-readable document.
+---
+### 3. Data Engineering: The "Gold Standard" Pipeline
+To achieve high fidelity without using private patient data, we developed a **Synthesized Data Pipeline**:
+1.  **Synthesis:** We generated **306 high-quality clinical scenarios** using Large Language Models (LLMs).
+2.  **Alignment:** Unlike previous iterations where headers were random, this dataset ensured strict mathematical alignment between the Identity Header (Age/Sex) and the Clinical Narrative.
+3.  **Result:** This eliminated the "hallucination" issues seen in earlier tests where the model would confuse patient gender or age due to conflicting training signals.
+### 4. Training Methodology
+*   **Base Model:** Microsoft Phi-3.5-mini-instruct (3.8B Parameters).
+*   **Framework:** **Unsloth** (Optimized QLoRA).
+*   **Technique:** **DoRA (Weight-Decomposed Low-Rank Adaptation)**.
+    *   *Why DoRA?* Standard LoRA struggles with strict syntax/coding tasks. DoRA updates both magnitude and direction vectors, allowing the model to learn the strict `JanusScript` grammar effectively.
+*   **Loss Masking:** We used `train_on_responses_only`. The model was **never** trained on the input text, only on the output. This prevents the model from memorizing patient PII from the training set.
+*   **Hyperparameters:** Rank 16, Alpha 16, Learning Rate 2e-4, **2 Epochs** (approx 78 steps used for final checkpoint).
+### 5. Results & Conclusion
+*   **Zero-Trust Validation:** The "Vault" successfully reconstructs documents using *only* the database for identity.
+*   **Semantic Expansion:** The model demonstrates the ability to take a concise code (`Dx(Pneumonia)`) and expand it into fluent medical narrative ("Patient presented with symptoms consistent with Pneumonia...").
+"""
+# ==============================================================================
+# 6. LAUNCHER
+# ==============================================================================
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald"), title=PROJECT_TITLE) as demo:
+    gr.Markdown(f"# 🏛️ {PROJECT_TITLE}")
+    with gr.Tabs():
+        # --- TAB 1 ---
+        with gr.TabItem("🛡️ Mode A: Local Air-Gap"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    inp_a = gr.Textbox(label="Raw Sensitive Note", lines=12, value=sample_note)
+                    btn_a = gr.Button("Execute Scout (Local) ➔", variant="primary")
+                with gr.Column(scale=1):
+                    out_proto_a = gr.Textbox(label="JanusScript Protocol", lines=6, interactive=True)
+            gr.Markdown("---")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    inp_db_a = gr.Textbox(label="Secure Identity Record", lines=12, value=sample_db)
+                    btn_final_a = gr.Button("Execute Vault (Local) ➔", variant="secondary")
+                with gr.Column(scale=1):
+                    out_final_a = gr.Textbox(label="Output: Reconstructed Document", lines=25)
+            btn_a.click(kernel_scout, inputs=inp_a, outputs=out_proto_a)
+            btn_final_a.click(kernel_vault, inputs=[out_proto_a, inp_db_a], outputs=out_final_a)
+        # --- TAB 2 ---
+        with gr.TabItem("🧠 Mode B: Cloud Bridge"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    inp_b = gr.Textbox(label="Clinical Scenario (Anonymized)", lines=12, value=sample_scenario)
+                    btn_b = gr.Button("Execute Cloud API (Llama-3) ➔", variant="primary")
+                with gr.Column(scale=1):
+                    out_proto_b = gr.Textbox(label="JanusScript Protocol", lines=6, interactive=True)
+            gr.Markdown("---")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    inp_db_b = gr.Textbox(label="Secure Identity Record", lines=12, value=sample_db_cloud)
+                    btn_final_b = gr.Button("Execute Vault (Local) ➔", variant="secondary")
+                with gr.Column(scale=1):
+                    out_final_b = gr.Textbox(label="Output: Reconstructed Document", lines=25)
+            btn_b.click(kernel_cloud_expert, inputs=inp_b, outputs=out_proto_b)
+            btn_final_b.click(kernel_vault, inputs=[out_proto_b, inp_db_b], outputs=out_final_b)
+        # --- TAB 3 ---
+        with gr.TabItem("📄 Technical Report"):
+            gr.Markdown(report_md)
+demo.launch()