File size: 1,737 Bytes
3818a51
ea9eade
af4e958
 
 
 
ea9eade
af4e958
3818a51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af4e958
 
 
 
3818a51
ea9eade
af4e958
3818a51
af4e958
 
 
 
3818a51
ea9eade
af4e958
3818a51
af4e958
 
 
 
3818a51
ea9eade
af4e958
3818a51
 
ea9eade
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
name: fraudshield
version: 0.2.0
description: |
  Production-grade OpenEnv environment for e-commerce fraud detection.
  Simulates marketplace fraud review with 3 difficulty levels (easy/medium/hard),
  deterministic graders, business-cost-sensitive rewards, and reproducible baselines.
author: Devika J
email: devikaj2005@gmail.com

environment:
  class: FraudShieldEnvironment
  module: fraudshield_env

action:
  class: FraudCheckAction
  module: models

observation:
  class: FraudCheckObservation
  module: models

reward:
  class: Reward
  module: models

tasks:
  - name: easy
    description: |
      Clear-cut fraud detection with strong single-transaction indicators.
      Agents should achieve 95%+ accuracy by identifying obvious red flags.
      Difficulty: Foundation level - rewards strong pattern recognition.
    difficulty: easy
    num_transactions: 24
    baseline_score: 1.0000
  - name: medium
    description: |
      Mixed-signal reviews where confidence calibration and risk tradeoffs matter.
      No single feature is decisive; agents must weigh multiple indicators.
      Difficulty: Intermediate - rewards balanced decision-making.
    difficulty: medium
    num_transactions: 36
    baseline_score: 0.8773
  - name: hard
    description: |
      Coordinated fraud rings and legitimate flash-sale edge cases with overlapping signals.
      Complex patterns require network-level analysis and tolerance for ambiguity.
      Difficulty: Advanced - rewards sophisticated reasoning.
    difficulty: hard
    num_transactions: 48
    baseline_score: 0.7206

metadata:
  created: "2026-03-30"
  framework: openenv
  license: MIT
  tags:
    - openenv
    - fraud-detection
    - e-commerce
    - agent-evaluation