st192011 commited on
Commit
8e4c007
·
verified ·
1 Parent(s): f30d567

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -40
app.py CHANGED
@@ -4,64 +4,50 @@ import json
4
  import os
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  from peft import PeftModel
7
- from huggingface_hub import InferenceClient, snapshot_download
8
 
9
  # ==============================================================================
10
  # 1. CONFIGURATION
11
  # ==============================================================================
12
- HF_TOKEN = os.getenv("HF_TOKEN")
 
 
13
  PROJECT_TITLE = "The Janus Interface: Semantic Decoupling Architecture"
14
 
15
  # Models
 
16
  BASE_MODEL_ID = "microsoft/Phi-3.5-mini-instruct"
17
  ADAPTER_ID = "st192011/janus-gold-lora"
18
  CLOUD_MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
19
 
20
  # ==============================================================================
21
- # 2. ENGINE INITIALIZATION (CPU Optimized + Config Sanitizer)
22
  # ==============================================================================
23
- print("⏳ Initializing Neural Backbone...")
24
 
25
  try:
 
26
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
27
 
28
- # Load Base Model
29
  base_model = AutoModelForCausalLM.from_pretrained(
30
  BASE_MODEL_ID,
31
  torch_dtype=torch.bfloat16,
32
  device_map="cpu",
33
- trust_remote_code=True,
34
- attn_implementation="eager"
35
  )
36
 
37
- print(f"⏳ Downloading and sanitizing adapter ({ADAPTER_ID})...")
38
-
39
- # 1. Download the adapter files locally
40
- local_adapter_path = snapshot_download(repo_id=ADAPTER_ID, token=HF_TOKEN)
41
-
42
- # 2. Load the config JSON
43
- config_path = os.path.join(local_adapter_path, "adapter_config.json")
44
- with open(config_path, "r") as f:
45
- config_data = json.load(f)
46
-
47
- # 3. Remove the key that causes the crash
48
- if "alora_invocation_tokens" in config_data:
49
- print("🧹 Cleaning incompatible Unsloth config keys...")
50
- del config_data["alora_invocation_tokens"]
51
-
52
- # Save the clean config back to disk
53
- with open(config_path, "w") as f:
54
- json.dump(config_data, f, indent=2)
55
-
56
- # 4. Load the adapter from the local sanitized folder
57
- model = PeftModel.from_pretrained(base_model, local_adapter_path)
58
- model.eval()
59
  print("✅ System Online.")
60
 
61
  except Exception as e:
62
  print(f"❌ Error loading model: {e}")
63
  raise e
64
 
 
65
  hf_client = InferenceClient(model=CLOUD_MODEL_ID, token=HF_TOKEN)
66
 
67
  # ==============================================================================
@@ -70,11 +56,15 @@ hf_client = InferenceClient(model=CLOUD_MODEL_ID, token=HF_TOKEN)
70
 
71
  def clean_output(text):
72
  """Sanitizes output to prevent chain-reaction failures."""
 
73
  clean = text.replace("<|end|>", "").replace("<|endoftext|>", "")
 
 
74
  if "Output:" in clean: clean = clean.split("Output:")[-1]
75
 
76
  lines = clean.split('\n')
77
- valid_lines = [line for line in lines if ":" in line and "Note" not in line]
 
78
  return " ".join(valid_lines).strip()
79
 
80
  def kernel_scout(raw_input):
@@ -100,7 +90,7 @@ RAW NOTE:
100
  max_new_tokens=256,
101
  temperature=0.1,
102
  do_sample=True,
103
- use_cache=True
104
  )
105
 
106
  text = tokenizer.batch_decode(outputs)[0]
@@ -154,11 +144,11 @@ PRIVATE_DB:
154
  with torch.no_grad():
155
  outputs = model.generate(
156
  **inputs,
157
- max_new_tokens=600,
158
  temperature=0.1,
159
  repetition_penalty=1.05,
160
  do_sample=True,
161
- use_cache=True
162
  )
163
 
164
  text = tokenizer.batch_decode(outputs)[0]
@@ -213,6 +203,8 @@ report_md = """
213
  # 🏛️ The Janus Interface: Research & Technical Analysis
214
  **Project Status:** Research Prototype v2.0 (Gold Standard)
215
 
 
 
216
  ### 1. Research Motivation: The Privacy-Utility Paradox
217
  In regulated domains (Healthcare, Legal, Finance), Generative AI adoption is stalled by a fundamental conflict:
218
  * **Utility:** Large Cloud Models (GPT-4, Claude) offer superior reasoning but require sending data off-premise.
@@ -235,12 +227,14 @@ The system utilizes a **Multi-Task Adapter** trained to switch between two disti
235
  * **Function:** A secure, offline engine that accepts the JanusScript and a Local SQL Database record.
236
  * **Output:** It merges the abstract logic with the concrete identity to generate the final, human-readable document.
237
 
 
 
238
  ### 3. Data Engineering: The "Gold Standard" Pipeline
239
- To achieve high fidelity without using private patient data, we developed a **Teacher-Student Distillation** pipeline:
240
- 1. **Source:** **MTSamples** (Open Source Medical Transcription).
241
- 2. **Distillation:** We used **Llama-3-70B** to read 4,000+ real medical notes and extract the logic into our custom `JanusScript` syntax.
242
- 3. **Synthesis:** We generated synthetic identities (Names/MRNs) using Python libraries.
243
- 4. **Alignment:** We programmatically constructed the "Target Output" by prepending the synthetic header to the real medical text.
244
 
245
  ### 4. Training Methodology
246
  * **Base Model:** Microsoft Phi-3.5-mini-instruct (3.8B Parameters).
@@ -248,10 +242,10 @@ To achieve high fidelity without using private patient data, we developed a **Te
248
  * **Technique:** **DoRA (Weight-Decomposed Low-Rank Adaptation)**.
249
  * *Why DoRA?* Standard LoRA struggles with strict syntax/coding tasks. DoRA updates both magnitude and direction vectors, allowing the model to learn the strict `JanusScript` grammar effectively.
250
  * **Loss Masking:** We used `train_on_responses_only`. The model was **never** trained on the input text, only on the output. This prevents the model from memorizing patient PII from the training set.
251
- * **Hyperparameters:** Rank 16, Alpha 16, Learning Rate 2e-4, 2 Epochs (306 samples).
252
 
253
  ### 5. Results & Conclusion
254
- * **Zero-Trust Validation:** The "Vault" successfully reconstructs documents using *only* the database for identity. If the DB says "Male" and the training data said "Female," the model now correctly obeys the DB.
255
  * **Semantic Expansion:** The model demonstrates the ability to take a concise code (`Dx(Pneumonia)`) and expand it into fluent medical narrative ("Patient presented with symptoms consistent with Pneumonia...").
256
  """
257
 
 
4
  import os
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  from peft import PeftModel
7
+ from huggingface_hub import InferenceClient
8
 
9
  # ==============================================================================
10
  # 1. CONFIGURATION
11
  # ==============================================================================
12
+ # NOTE: You must set 'HF_TOKEN' in your Hugging Face Space Secrets!
13
+ HF_TOKEN = os.getenv("HF_TOKEN")
14
+
15
  PROJECT_TITLE = "The Janus Interface: Semantic Decoupling Architecture"
16
 
17
  # Models
18
+ # We use the official Microsoft repo for CPU compatibility
19
  BASE_MODEL_ID = "microsoft/Phi-3.5-mini-instruct"
20
  ADAPTER_ID = "st192011/janus-gold-lora"
21
  CLOUD_MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
22
 
23
  # ==============================================================================
24
+ # 2. ENGINE INITIALIZATION (CPU Optimized)
25
  # ==============================================================================
26
+ print("⏳ Initializing Neural Backbone (CPU Mode)...")
27
 
28
  try:
29
+ # Load Tokenizer
30
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
31
 
32
+ # Load Base Model (bfloat16 saves RAM on Free Tier Spaces)
33
  base_model = AutoModelForCausalLM.from_pretrained(
34
  BASE_MODEL_ID,
35
  torch_dtype=torch.bfloat16,
36
  device_map="cpu",
37
+ trust_remote_code=True
 
38
  )
39
 
40
+ # Load Adapter
41
+ print(f"⏳ Mounting Janus Adapter ({ADAPTER_ID})...")
42
+ model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
43
+ model.eval() # Set to inference mode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  print("✅ System Online.")
45
 
46
  except Exception as e:
47
  print(f"❌ Error loading model: {e}")
48
  raise e
49
 
50
+ # Cloud Client
51
  hf_client = InferenceClient(model=CLOUD_MODEL_ID, token=HF_TOKEN)
52
 
53
  # ==============================================================================
 
56
 
57
  def clean_output(text):
58
  """Sanitizes output to prevent chain-reaction failures."""
59
+ # Remove special tokens
60
  clean = text.replace("<|end|>", "").replace("<|endoftext|>", "")
61
+
62
+ # Remove conversational filler lines
63
  if "Output:" in clean: clean = clean.split("Output:")[-1]
64
 
65
  lines = clean.split('\n')
66
+ # Keep lines that look like protocol code or normal text, remove "Here is..."
67
+ valid_lines = [line for line in lines if "Note" not in line and "Here is" not in line]
68
  return " ".join(valid_lines).strip()
69
 
70
  def kernel_scout(raw_input):
 
90
  max_new_tokens=256,
91
  temperature=0.1,
92
  do_sample=True,
93
+ use_cache=False
94
  )
95
 
96
  text = tokenizer.batch_decode(outputs)[0]
 
144
  with torch.no_grad():
145
  outputs = model.generate(
146
  **inputs,
147
+ max_new_tokens=1024,
148
  temperature=0.1,
149
  repetition_penalty=1.05,
150
  do_sample=True,
151
+ use_cache=False
152
  )
153
 
154
  text = tokenizer.batch_decode(outputs)[0]
 
203
  # 🏛️ The Janus Interface: Research & Technical Analysis
204
  **Project Status:** Research Prototype v2.0 (Gold Standard)
205
 
206
+ ---
207
+
208
  ### 1. Research Motivation: The Privacy-Utility Paradox
209
  In regulated domains (Healthcare, Legal, Finance), Generative AI adoption is stalled by a fundamental conflict:
210
  * **Utility:** Large Cloud Models (GPT-4, Claude) offer superior reasoning but require sending data off-premise.
 
227
  * **Function:** A secure, offline engine that accepts the JanusScript and a Local SQL Database record.
228
  * **Output:** It merges the abstract logic with the concrete identity to generate the final, human-readable document.
229
 
230
+ ---
231
+
232
  ### 3. Data Engineering: The "Gold Standard" Pipeline
233
+ To achieve high fidelity without using private patient data, we developed a **Synthesized Data Pipeline**:
234
+
235
+ 1. **Synthesis:** We generated **306 high-quality clinical scenarios** using Large Language Models (LLMs).
236
+ 2. **Alignment:** Unlike previous iterations where headers were random, this dataset ensured strict mathematical alignment between the Identity Header (Age/Sex) and the Clinical Narrative.
237
+ 3. **Result:** This eliminated the "hallucination" issues seen in earlier tests where the model would confuse patient gender or age due to conflicting training signals.
238
 
239
  ### 4. Training Methodology
240
  * **Base Model:** Microsoft Phi-3.5-mini-instruct (3.8B Parameters).
 
242
  * **Technique:** **DoRA (Weight-Decomposed Low-Rank Adaptation)**.
243
  * *Why DoRA?* Standard LoRA struggles with strict syntax/coding tasks. DoRA updates both magnitude and direction vectors, allowing the model to learn the strict `JanusScript` grammar effectively.
244
  * **Loss Masking:** We used `train_on_responses_only`. The model was **never** trained on the input text, only on the output. This prevents the model from memorizing patient PII from the training set.
245
+ * **Hyperparameters:** Rank 16, Alpha 16, Learning Rate 2e-4, **2 Epochs** (approx 78 steps used for final checkpoint).
246
 
247
  ### 5. Results & Conclusion
248
+ * **Zero-Trust Validation:** The "Vault" successfully reconstructs documents using *only* the database for identity.
249
  * **Semantic Expansion:** The model demonstrates the ability to take a concise code (`Dx(Pneumonia)`) and expand it into fluent medical narrative ("Patient presented with symptoms consistent with Pneumonia...").
250
  """
251