Text Generation
Transformers
Safetensors
English
qwen2
Generated from Trainer
grpo
trl
security
smart-contracts
solidity
audit
web3
conversational
text-generation-inference
Instructions to use oxdev/security-auditor-grpo with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use oxdev/security-auditor-grpo with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="oxdev/security-auditor-grpo") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("oxdev/security-auditor-grpo") model = AutoModelForCausalLM.from_pretrained("oxdev/security-auditor-grpo") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use oxdev/security-auditor-grpo with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "oxdev/security-auditor-grpo" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "oxdev/security-auditor-grpo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/oxdev/security-auditor-grpo
- SGLang
How to use oxdev/security-auditor-grpo with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "oxdev/security-auditor-grpo" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "oxdev/security-auditor-grpo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "oxdev/security-auditor-grpo" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "oxdev/security-auditor-grpo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use oxdev/security-auditor-grpo with Docker Model Runner:
docker model run hf.co/oxdev/security-auditor-grpo
v2: 5K subset for A10G, fix escaping
Browse files- train_grpo_v2.py +13 -15
train_grpo_v2.py
CHANGED
|
@@ -246,34 +246,32 @@ def main():
|
|
| 246 |
sev_dist = Counter(dataset['severity'])
|
| 247 |
logger.info(f"Severity distribution: {dict(sev_dist)}")
|
| 248 |
|
| 249 |
-
# Subsample
|
| 250 |
-
#
|
| 251 |
-
logger.info("Selecting high-quality training subset...")
|
| 252 |
indices = []
|
|
|
|
| 253 |
|
| 254 |
-
# Priority 1: HIGH severity with code (most valuable)
|
| 255 |
for i, row in enumerate(dataset):
|
| 256 |
if row['severity'] in ('high', 'critical') and row['has_code']:
|
| 257 |
indices.append(i)
|
|
|
|
| 258 |
logger.info(f" HIGH+CRITICAL with code: {len(indices)}")
|
| 259 |
|
| 260 |
-
# Priority 2:
|
| 261 |
for i, row in enumerate(dataset):
|
| 262 |
-
if row['
|
| 263 |
-
indices.append(i)
|
| 264 |
-
logger.info(f" + MEDIUM with code: {len(indices)}")
|
| 265 |
-
|
| 266 |
-
# Priority 3: Any with PoC reference
|
| 267 |
-
for i, row in enumerate(dataset):
|
| 268 |
-
if row['has_poc'] and i not in set(indices):
|
| 269 |
indices.append(i)
|
|
|
|
| 270 |
logger.info(f" + Has PoC: {len(indices)}")
|
| 271 |
|
| 272 |
-
# Priority
|
| 273 |
for i, row in enumerate(dataset):
|
| 274 |
-
if row['severity']
|
| 275 |
indices.append(i)
|
| 276 |
-
|
|
|
|
| 277 |
break
|
| 278 |
logger.info(f" Final subset: {len(indices)} samples")
|
| 279 |
|
|
|
|
| 246 |
sev_dist = Counter(dataset['severity'])
|
| 247 |
logger.info(f"Severity distribution: {dict(sev_dist)}")
|
| 248 |
|
| 249 |
+
# Subsample — 5K highest-value samples for A10G (fits in ~6hrs)
|
| 250 |
+
# Focus on HIGH+CRITICAL with code — most valuable training signal
|
| 251 |
+
logger.info("Selecting high-quality training subset (5K for A10G)...")
|
| 252 |
indices = []
|
| 253 |
+
idx_set = set()
|
| 254 |
|
| 255 |
+
# Priority 1: HIGH+CRITICAL severity with code (most valuable)
|
| 256 |
for i, row in enumerate(dataset):
|
| 257 |
if row['severity'] in ('high', 'critical') and row['has_code']:
|
| 258 |
indices.append(i)
|
| 259 |
+
idx_set.add(i)
|
| 260 |
logger.info(f" HIGH+CRITICAL with code: {len(indices)}")
|
| 261 |
|
| 262 |
+
# Priority 2: Any with PoC reference
|
| 263 |
for i, row in enumerate(dataset):
|
| 264 |
+
if row['has_poc'] and i not in idx_set:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
indices.append(i)
|
| 266 |
+
idx_set.add(i)
|
| 267 |
logger.info(f" + Has PoC: {len(indices)}")
|
| 268 |
|
| 269 |
+
# Priority 3: MEDIUM with code (fill to 5K cap)
|
| 270 |
for i, row in enumerate(dataset):
|
| 271 |
+
if row['severity'] == 'medium' and row['has_code'] and i not in idx_set:
|
| 272 |
indices.append(i)
|
| 273 |
+
idx_set.add(i)
|
| 274 |
+
if len(indices) >= 5000:
|
| 275 |
break
|
| 276 |
logger.info(f" Final subset: {len(indices)} samples")
|
| 277 |
|