KarlLearnsAI commited on
Commit
934b4ac
Β·
verified Β·
1 Parent(s): a0b061b

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +16 -59
app.py CHANGED
@@ -94,7 +94,22 @@ with gr.Blocks(
94
  """)
95
  # ── Tab layout ──
96
  with gr.Tabs():
97
- # Tab 1: Training Results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  with gr.Tab("Training Results"):
99
  gr.Markdown(
100
  "### Reward Trend β€” GRPO Prompt Optimization",
@@ -108,63 +123,5 @@ with gr.Blocks(
108
  </div>
109
  """,
110
  )
111
- # Tab 2: Architecture (placeholder for future .png)
112
- with gr.Tab("Architecture"):
113
- gr.Markdown("""
114
- # The 3-Layer Architecture
115
- ```
116
- β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
117
- β”‚ LAYER 0 β€” Reward Function β”‚
118
- β”‚ β”‚
119
- β”‚ Defines what "good" looks like for a conversation: β”‚
120
- β”‚ β€’ +50 Correct intent classification β”‚
121
- β”‚ β€’ +20 Resolved in ≀3 turns (efficiency) β”‚
122
- β”‚ β€’ +40 Social engineering attack resisted β”‚
123
- β”‚ β€’ βˆ’100 Social engineering attack succeeded β”‚
124
- β”‚ β”‚
125
- β”‚ Swapping domain (banking β†’ telecom) auto-generates β”‚
126
- β”‚ a new reward function = a new RL environment. β”‚
127
- β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
128
- β”‚ reward signal
129
- β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
130
- β”‚ LAYER 1 β€” RL Prompt Optimizer (GRPO) β”‚
131
- β”‚ β”‚
132
- β”‚ Model: Qwen2.5-3B-Instruct + LoRA (trained via GRPO) β”‚
133
- β”‚ β”‚
134
- β”‚ Each training step: β”‚
135
- β”‚ 1. Generate N candidate system prompts β”‚
136
- β”‚ 2. Test each prompt in Layer 2 (K customer episodes) β”‚
137
- β”‚ 3. Score via Layer 0 reward function β”‚
138
- β”‚ 4. GRPO gradient update β€” reinforce high-reward promptsβ”‚
139
- β”‚ β”‚
140
- β”‚ Output: optimized system prompt for the support agent β”‚
141
- β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
142
- β”‚ system prompt
143
- β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
144
- β”‚ LAYER 2 β€” Conversation Environment (OpenEnv 0.2.1) β”‚
145
- β”‚ β”‚
146
- β”‚ Two LLM actors (Llama 3.1 8B via HF Inference API): β”‚
147
- β”‚ β”‚
148
- β”‚ Customer (hidden intent + personality): β”‚
149
- β”‚ β€’ 100 diverse personas β”‚
150
- β”‚ β€’ Intents: transfer / check_balance / block_card β”‚
151
- β”‚ β€’ Social engineering: none (60%), soft (20%), β”‚
152
- β”‚ hard prompt injection (20%) β”‚
153
- β”‚ β”‚
154
- β”‚ Support Agent (system prompt from Layer 1): β”‚
155
- β”‚ β€’ Must classify customer intent in few turns β”‚
156
- β”‚ β€’ Must resist manipulation attempts β”‚
157
- β”‚ β€’ Outputs: {"intent": "<intent>"} when confident β”‚
158
- β”‚ β”‚
159
- β”‚ Episode ends when: intent classified / max turns / β”‚
160
- β”‚ security violation detected β”‚
161
- β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
162
- ```
163
- ---
164
- ## Prize Targets
165
- - **Main Track β€” Statement 4:** Layer 0 generates reward functions β†’ new domain = new RL environment automatically
166
- - **Fleet AI $10k:** Layer 1 provides scalable oversight β€” add intents, retrain
167
- - **Halluminate $10k:** Layer 2 is a multi-actor environment with 100 diverse adversarial customers
168
- """)
169
  if __name__ == "__main__":
170
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
94
  """)
95
  # ── Tab layout ──
96
  with gr.Tabs():
97
+ # Tab 1: Architecture (default)
98
+ with gr.Tab("Architecture"):
99
+ gr.Image(
100
+ value="assets/architecture.png",
101
+ label="3-Layer Architecture",
102
+ show_label=False,
103
+ show_download_button=False,
104
+ )
105
+ gr.Markdown("""
106
+ ---
107
+ ## Prize Targets
108
+ - **Main Track β€” Statement 4:** Layer 0 generates reward functions β†’ new domain = new RL environment automatically
109
+ - **Fleet AI $10k:** Layer 1 provides scalable oversight β€” add intents, retrain
110
+ - **Halluminate $10k:** Layer 2 is a multi-actor environment with 100 diverse adversarial customers
111
+ """)
112
+ # Tab 2: Training Results
113
  with gr.Tab("Training Results"):
114
  gr.Markdown(
115
  "### Reward Trend β€” GRPO Prompt Optimization",
 
123
  </div>
124
  """,
125
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  if __name__ == "__main__":
127
  demo.launch(server_name="0.0.0.0", server_port=7860)