davda54 commited on
Commit
07857f0
·
verified ·
1 Parent(s): cb16510

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -4
app.py CHANGED
@@ -249,8 +249,27 @@ def load_dataset_samples():
249
  "model_b": model_b,
250
  "dataset": item.get("dataset", "unknown")
251
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
253
- return pairwise_samples
254
 
255
  except Exception as e:
256
  print(f"Error loading dataset: {e}")
@@ -267,7 +286,7 @@ def load_dataset_samples():
267
  "model_b": "modelB",
268
  "dataset": "test"
269
  }
270
- ]
271
 
272
 
273
  def swap_sample(sample):
@@ -283,7 +302,7 @@ def swap_sample(sample):
283
  }
284
 
285
  # Load dataset on startup
286
- DATASET_SAMPLES = load_dataset_samples()
287
 
288
  class AnnotationManager:
289
  def __init__(self):
@@ -301,7 +320,7 @@ class AnnotationManager:
301
 
302
  def get_user_seed(self, user_id: str) -> int:
303
  """Generate consistent seed for user"""
304
- return int(hashlib.md5(user_id.encode()).hexdigest(), 16) % 10000
305
 
306
  def get_user_samples(self, user_id: str) -> List[Dict]:
307
  """Get shuffled samples for user based on their ID"""
@@ -312,6 +331,7 @@ class AnnotationManager:
312
  sample if random.Random(seed + i).randint(0, 1) == 0 else swap_sample(sample)
313
  for i, sample in enumerate(samples)
314
  ]
 
315
  return samples
316
 
317
  def get_next_sample(self, user_id: str) -> Tuple[Dict, int, int]:
 
249
  "model_b": model_b,
250
  "dataset": item.get("dataset", "unknown")
251
  })
252
+
253
+ extra_dataset = load_dataset("ltg/fluency-generations", split="train_extra", token=HF_TOKEN)
254
+ extra_pairwise_samples = []
255
+ for i, item in enumerate(dataset):
256
+ sample_id = item["sample_id"]
257
+ prompt = item["prompt"]
258
+ responses = item["responses"]
259
+ model_a, model_b = MODEL_PAIRS[i]
260
+ model_a, model_b = model_a, model_b if i % 2 == 0 else model_b, model_a
261
+ extra_pairwise_samples.append({
262
+ "id": f"{sample_id}_{model_a}_vs_{model_b}",
263
+ "original_id": sample_id,
264
+ "prompt": prompt,
265
+ "response_a": responses[model_a],
266
+ "response_b": responses[model_b],
267
+ "model_a": model_a,
268
+ "model_b": model_b,
269
+ "dataset": item.get("dataset", "unknown")
270
+ })
271
 
272
+ return pairwise_samples, extra_pairwise_samples
273
 
274
  except Exception as e:
275
  print(f"Error loading dataset: {e}")
 
286
  "model_b": "modelB",
287
  "dataset": "test"
288
  }
289
+ ], []
290
 
291
 
292
  def swap_sample(sample):
 
302
  }
303
 
304
  # Load dataset on startup
305
+ DATASET_SAMPLES, EXTRA_DATASET_SAMPLES = load_dataset_samples()
306
 
307
  class AnnotationManager:
308
  def __init__(self):
 
320
 
321
  def get_user_seed(self, user_id: str) -> int:
322
  """Generate consistent seed for user"""
323
+ return int(hashlib.md5(user_id.encode()).hexdigest(), 16)
324
 
325
  def get_user_samples(self, user_id: str) -> List[Dict]:
326
  """Get shuffled samples for user based on their ID"""
 
331
  sample if random.Random(seed + i).randint(0, 1) == 0 else swap_sample(sample)
332
  for i, sample in enumerate(samples)
333
  ]
334
+ samples = EXTRA_DATASET_SAMPLES.copy() + samples
335
  return samples
336
 
337
  def get_next_sample(self, user_id: str) -> Tuple[Dict, int, int]: