openfree commited on
Commit
c581326
·
verified ·
1 Parent(s): 6947b37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +294 -57
app.py CHANGED
@@ -215,7 +215,8 @@ class ArenaDatabase:
215
  # Always initialize local database as fallback
216
  self.init_database()
217
 
218
- # Sync from HF if available
 
219
  if self.use_hf:
220
  self._sync_from_hf()
221
 
@@ -234,46 +235,159 @@ class ArenaDatabase:
234
  print(f"Dataset repo creation note: {e}")
235
 
236
  def _sync_from_hf(self):
237
- """Sync data from Hugging Face to local database"""
238
  try:
239
- # Try to load existing data from HF
240
- dataset = load_dataset(self.hf_repo_id, split="train", use_auth_token=self.hf_token)
241
 
242
- conn = sqlite3.connect(self.db_path)
243
- cursor = conn.cursor()
244
-
245
- # Load battles
246
- if "battles" in dataset.column_names:
247
- battles_df = dataset.to_pandas()
248
- battles_df.to_sql('battles_temp', conn, if_exists='replace', index=False)
249
 
250
- # Merge with existing battles (avoid duplicates)
251
- cursor.execute('''
252
- INSERT OR IGNORE INTO battles
253
- SELECT * FROM battles_temp
254
- ''')
255
- cursor.execute('DROP TABLE battles_temp')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
- # Load model stats
258
  try:
259
- stats_dataset = load_dataset(self.hf_repo_id, split="stats", use_auth_token=self.hf_token)
 
 
 
 
 
260
  if stats_dataset and len(stats_dataset) > 0:
 
 
 
261
  stats_df = stats_dataset.to_pandas()
262
- stats_df.to_sql('model_stats_temp', conn, if_exists='replace', index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
- # Update stats with latest from HF
265
- cursor.execute('DELETE FROM model_stats')
266
- cursor.execute('INSERT INTO model_stats SELECT * FROM model_stats_temp')
267
- cursor.execute('DROP TABLE model_stats_temp')
268
- except:
269
- pass # Stats split might not exist yet
270
 
271
- conn.commit()
272
- conn.close()
273
- print("✅ Synced data from Hugging Face")
274
 
275
  except Exception as e:
276
- print(f"Note: Could not sync from HF (might be first run): {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
  def _sync_to_hf(self):
279
  """Sync local database to Hugging Face"""
@@ -293,6 +407,7 @@ class ArenaDatabase:
293
  token=self.hf_token,
294
  private=True
295
  )
 
296
 
297
  # Export model stats
298
  stats_df = pd.read_sql_query("SELECT * FROM model_stats", conn)
@@ -304,12 +419,12 @@ class ArenaDatabase:
304
  token=self.hf_token,
305
  private=True
306
  )
 
307
 
308
  conn.close()
309
- print("✅ Synced data to Hugging Face")
310
 
311
  except Exception as e:
312
- print(f"Warning: Could not sync to HF: {e}")
313
 
314
  def init_database(self):
315
  """Initialize SQLite database"""
@@ -589,23 +704,19 @@ class LLMInterface:
589
  full_response = self._get_jetxa_response(full_prompt)
590
 
591
  if full_response:
592
- # Format jetXA response with proper spacing
593
  formatted_response = self._format_jetxa_response(full_response)
594
 
595
- # Simulate streaming word by word for jetXA for smoother effect
596
- words = formatted_response.split()
597
  accumulated = ""
 
598
 
599
- # Stream words in small batches for natural effect
600
- batch_size = 2 # Stream 2 words at a time
601
- for i in range(0, len(words), batch_size):
602
- batch = words[i:i+batch_size]
603
- for word in batch:
604
- if accumulated:
605
- accumulated += " "
606
- accumulated += word
607
- yield accumulated # Yield accumulated text after each batch
608
- time.sleep(0.03) # Small delay between batches
609
  else:
610
  # Use fallback if jetXA fails
611
  fallback = self._generate_fallback(model, prompt, language)
@@ -822,7 +933,7 @@ class LLMInterface:
822
  yield fallback
823
 
824
  def _get_jetxa_response(self, prompt: str) -> str:
825
- """Get complete response from jetXA"""
826
  if not self.gradio_client:
827
  return ""
828
 
@@ -838,37 +949,163 @@ class LLMInterface:
838
 
839
  response_text = ""
840
 
 
 
 
 
 
 
 
 
 
 
 
841
  if result and isinstance(result, (tuple, list)) and len(result) >= 1:
 
 
 
842
  chat_history = result[0]
843
 
844
  if isinstance(chat_history, list) and len(chat_history) > 0:
 
845
  for msg in reversed(chat_history):
846
  if isinstance(msg, dict):
847
- content = msg.get('content', '')
848
- if content:
 
849
  response_text = str(content)
 
850
  break
851
  elif isinstance(msg, (list, tuple)) and len(msg) >= 2:
852
- if msg[1]:
 
853
  response_text = str(msg[1])
 
854
  break
 
 
 
 
855
 
 
856
  if not response_text:
857
- for i in range(1, min(3, len(result))):
858
- if result[i] and isinstance(result[i], str) and result[i].strip():
859
- response_text = result[i]
860
- break
 
 
 
 
 
 
 
 
 
861
 
862
  if response_text:
863
- # Clean up any potential formatting issues
864
- response_text = self._clean_markdown_response(response_text)
865
-
866
- return response_text
 
 
867
 
868
  except Exception as e:
869
  print(f"jetXA response error: {e}")
 
 
870
  return ""
871
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
872
  def _clean_markdown_response(self, text: str) -> str:
873
  """Clean and fix common markdown formatting issues"""
874
  # Remove any duplicate markers or broken formatting
 
215
  # Always initialize local database as fallback
216
  self.init_database()
217
 
218
+ # IMPORTANT: Sync from HF AFTER initializing local DB
219
+ # This will load all historical data
220
  if self.use_hf:
221
  self._sync_from_hf()
222
 
 
235
  print(f"Dataset repo creation note: {e}")
236
 
237
  def _sync_from_hf(self):
238
+ """Sync data from Hugging Face to local database - PRESERVES existing data"""
239
  try:
240
+ print("📥 Loading historical data from Hugging Face...")
 
241
 
242
+ # Try to load existing battles data
243
+ try:
244
+ battles_dataset = load_dataset(
245
+ self.hf_repo_id,
246
+ split="train",
247
+ use_auth_token=self.hf_token
248
+ )
249
 
250
+ if battles_dataset and len(battles_dataset) > 0:
251
+ conn = sqlite3.connect(self.db_path)
252
+ cursor = conn.cursor()
253
+
254
+ # Convert to DataFrame
255
+ battles_df = battles_dataset.to_pandas()
256
+ print(f" Found {len(battles_df)} historical battles")
257
+
258
+ # Insert battles one by one, ignoring duplicates
259
+ for _, row in battles_df.iterrows():
260
+ try:
261
+ cursor.execute('''
262
+ INSERT OR IGNORE INTO battles
263
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
264
+ ''', (
265
+ row.get('id'),
266
+ row.get('prompt_id'),
267
+ row.get('prompt_text'),
268
+ row.get('category'),
269
+ row.get('model_a'),
270
+ row.get('model_b'),
271
+ row.get('response_a'),
272
+ row.get('response_b'),
273
+ row.get('winner'),
274
+ row.get('voter_id'),
275
+ row.get('timestamp'),
276
+ row.get('custom_prompt', 0),
277
+ row.get('language', 'en')
278
+ ))
279
+ except Exception as e:
280
+ print(f" Skipping battle {row.get('id')}: {e}")
281
+ continue
282
+
283
+ conn.commit()
284
+ conn.close()
285
+ print(f" ✅ Loaded {len(battles_df)} battles from HF")
286
+ except Exception as e:
287
+ print(f" Note: No battles data found (might be first run): {e}")
288
 
289
+ # Try to load model stats
290
  try:
291
+ stats_dataset = load_dataset(
292
+ self.hf_repo_id,
293
+ split="stats",
294
+ use_auth_token=self.hf_token
295
+ )
296
+
297
  if stats_dataset and len(stats_dataset) > 0:
298
+ conn = sqlite3.connect(self.db_path)
299
+ cursor = conn.cursor()
300
+
301
  stats_df = stats_dataset.to_pandas()
302
+ print(f" Found stats for {len(stats_df)} models")
303
+
304
+ # Update model stats with the latest from HF
305
+ for _, row in stats_df.iterrows():
306
+ cursor.execute('''
307
+ INSERT OR REPLACE INTO model_stats
308
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
309
+ ''', (
310
+ row.get('model_name'),
311
+ row.get('overall_score', 5.0),
312
+ row.get('storytelling_score', 5.0),
313
+ row.get('innovation_score', 5.0),
314
+ row.get('business_score', 5.0),
315
+ row.get('total_battles', 0),
316
+ row.get('wins', 0),
317
+ row.get('losses', 0),
318
+ row.get('elo_rating', 1500)
319
+ ))
320
 
321
+ conn.commit()
322
+ conn.close()
323
+ print(f" ✅ Loaded stats for {len(stats_df)} models from HF")
324
+ except Exception as e:
325
+ print(f" Note: No stats data found: {e}")
 
326
 
327
+ # Recalculate stats based on loaded battles
328
+ self._recalculate_stats_from_battles()
 
329
 
330
  except Exception as e:
331
+ print(f"⚠️ Could not sync from HF (might be first run): {e}")
332
+
333
+ def _recalculate_stats_from_battles(self):
334
+ """Recalculate model stats from battle history"""
335
+ conn = sqlite3.connect(self.db_path)
336
+ cursor = conn.cursor()
337
+
338
+ # Reset stats for GPT-5 and jetXA
339
+ models = ["GPT-5", "jetXA"]
340
+ for model in models:
341
+ cursor.execute('''
342
+ INSERT OR REPLACE INTO model_stats
343
+ (model_name, overall_score, storytelling_score, innovation_score,
344
+ business_score, total_battles, wins, losses, elo_rating)
345
+ VALUES (?, 5.0, 5.0, 5.0, 5.0, 0, 0, 0, 1500)
346
+ ''', (model,))
347
+
348
+ # Get all battles with winners
349
+ cursor.execute('''
350
+ SELECT category, model_a, model_b, winner
351
+ FROM battles
352
+ WHERE winner IS NOT NULL AND winner != ''
353
+ ORDER BY timestamp ASC
354
+ ''')
355
+
356
+ battles = cursor.fetchall()
357
+ print(f" Recalculating stats from {len(battles)} completed battles...")
358
+
359
+ # Process each battle
360
+ for category, model_a, model_b, winner in battles:
361
+ loser = model_b if winner == model_a else model_a
362
+
363
+ # Update battle counts
364
+ cursor.execute('''
365
+ UPDATE model_stats
366
+ SET total_battles = total_battles + 1, wins = wins + 1
367
+ WHERE model_name = ?
368
+ ''', (winner,))
369
+
370
+ cursor.execute('''
371
+ UPDATE model_stats
372
+ SET total_battles = total_battles + 1, losses = losses + 1
373
+ WHERE model_name = ?
374
+ ''', (loser,))
375
+
376
+ # Update category scores
377
+ try:
378
+ cat_enum = Category(category)
379
+ self._update_category_scores(cursor, winner, cat_enum, True)
380
+ self._update_category_scores(cursor, loser, cat_enum, False)
381
+
382
+ # Update ELO
383
+ self._update_elo_ratings(cursor, winner, loser)
384
+ except Exception as e:
385
+ print(f" Error processing battle: {e}")
386
+ continue
387
+
388
+ conn.commit()
389
+ conn.close()
390
+ print(f" ✅ Stats recalculated successfully")
391
 
392
  def _sync_to_hf(self):
393
  """Sync local database to Hugging Face"""
 
407
  token=self.hf_token,
408
  private=True
409
  )
410
+ print(f" 📤 Synced {len(battles_df)} battles to HF")
411
 
412
  # Export model stats
413
  stats_df = pd.read_sql_query("SELECT * FROM model_stats", conn)
 
419
  token=self.hf_token,
420
  private=True
421
  )
422
+ print(f" 📤 Synced stats for {len(stats_df)} models to HF")
423
 
424
  conn.close()
 
425
 
426
  except Exception as e:
427
+ print(f"⚠️ Warning: Could not sync to HF: {e}")
428
 
429
  def init_database(self):
430
  """Initialize SQLite database"""
 
704
  full_response = self._get_jetxa_response(full_prompt)
705
 
706
  if full_response:
707
+ # Format jetXA response AFTER getting it
708
  formatted_response = self._format_jetxa_response(full_response)
709
 
710
+ # Stream the formatted response character by character for smooth effect
711
+ # This preserves all formatting including line breaks
712
  accumulated = ""
713
+ chunk_size = 5 # Characters at a time
714
 
715
+ for i in range(0, len(formatted_response), chunk_size):
716
+ chunk = formatted_response[i:i+chunk_size]
717
+ accumulated += chunk
718
+ yield accumulated
719
+ time.sleep(0.01) # Small delay for streaming effect
 
 
 
 
 
720
  else:
721
  # Use fallback if jetXA fails
722
  fallback = self._generate_fallback(model, prompt, language)
 
933
  yield fallback
934
 
935
  def _get_jetxa_response(self, prompt: str) -> str:
936
+ """Get complete response from jetXA with improved parsing"""
937
  if not self.gradio_client:
938
  return ""
939
 
 
949
 
950
  response_text = ""
951
 
952
+ # Debug: Print the result structure
953
+ print(f"jetXA result type: {type(result)}")
954
+ if isinstance(result, (tuple, list)):
955
+ print(f"jetXA result length: {len(result)}")
956
+ for i, item in enumerate(result[:3]): # Print first 3 items
957
+ print(f" Item {i} type: {type(item)}")
958
+ if isinstance(item, str):
959
+ print(f" String preview: {item[:100]}...")
960
+ elif isinstance(item, list) and len(item) > 0:
961
+ print(f" List length: {len(item)}")
962
+
963
  if result and isinstance(result, (tuple, list)) and len(result) >= 1:
964
+ # Try multiple extraction methods
965
+
966
+ # Method 1: Check if first element is chat history
967
  chat_history = result[0]
968
 
969
  if isinstance(chat_history, list) and len(chat_history) > 0:
970
+ # Look for the last assistant message
971
  for msg in reversed(chat_history):
972
  if isinstance(msg, dict):
973
+ # Check for 'content' or 'message' key
974
+ content = msg.get('content') or msg.get('message') or msg.get('text', '')
975
+ if content and str(content).strip():
976
  response_text = str(content)
977
+ print(f" Found response in dict format")
978
  break
979
  elif isinstance(msg, (list, tuple)) and len(msg) >= 2:
980
+ # Format: [user_msg, assistant_msg]
981
+ if msg[1] and str(msg[1]).strip():
982
  response_text = str(msg[1])
983
+ print(f" Found response in tuple format")
984
  break
985
+ elif isinstance(msg, str) and msg.strip():
986
+ response_text = msg
987
+ print(f" Found response as string")
988
+ break
989
 
990
+ # Method 2: If no response yet, check other indices
991
  if not response_text:
992
+ for i in range(1, min(4, len(result))):
993
+ if result[i]:
994
+ if isinstance(result[i], str) and result[i].strip():
995
+ response_text = result[i]
996
+ print(f" Found response at index {i}")
997
+ break
998
+ elif isinstance(result[i], dict):
999
+ # Try to extract from dict
1000
+ for key in ['content', 'message', 'text', 'response']:
1001
+ if key in result[i] and result[i][key]:
1002
+ response_text = str(result[i][key])
1003
+ print(f" Found response in dict at index {i}")
1004
+ break
1005
 
1006
  if response_text:
1007
+ print(f" Response length: {len(response_text)} chars")
1008
+ # DO NOT clean or modify the response here - preserve original formatting
1009
+ return response_text
1010
+ else:
1011
+ print(f" No response text found in result")
1012
+ return ""
1013
 
1014
  except Exception as e:
1015
  print(f"jetXA response error: {e}")
1016
+ import traceback
1017
+ traceback.print_exc()
1018
  return ""
1019
 
1020
+ def _format_jetxa_response(self, text: str) -> str:
1021
+ """Format jetXA response with proper spacing and line breaks for better readability"""
1022
+ if not text:
1023
+ return text
1024
+
1025
+ # First, preserve the original line breaks and structure
1026
+ # Do NOT strip or clean the text aggressively
1027
+
1028
+ # Split into lines preserving empty lines
1029
+ lines = text.split('\n')
1030
+ formatted_lines = []
1031
+
1032
+ for i, line in enumerate(lines):
1033
+ # Don't strip lines completely - preserve indentation
1034
+ line_stripped = line.strip()
1035
+
1036
+ # Keep empty lines
1037
+ if not line_stripped:
1038
+ formatted_lines.append('')
1039
+ continue
1040
+
1041
+ # Headers - add spacing
1042
+ if line_stripped.startswith('#'):
1043
+ # Add spacing before headers (except first line)
1044
+ if i > 0 and formatted_lines and formatted_lines[-1].strip():
1045
+ formatted_lines.append('')
1046
+ formatted_lines.append(line_stripped)
1047
+ # Add spacing after headers
1048
+ formatted_lines.append('')
1049
+
1050
+ # Lists - preserve formatting
1051
+ elif (line_stripped.startswith('- ') or
1052
+ line_stripped.startswith('* ') or
1053
+ re.match(r'^\d+\. ', line_stripped)):
1054
+ # Add space before first list item if needed
1055
+ if (i > 0 and formatted_lines and
1056
+ formatted_lines[-1].strip() and
1057
+ not re.match(r'^[-*]|\d+\.', formatted_lines[-1].strip())):
1058
+ formatted_lines.append('')
1059
+ formatted_lines.append(line_stripped)
1060
+
1061
+ # Blockquotes
1062
+ elif line_stripped.startswith('>'):
1063
+ # Add spacing around blockquotes
1064
+ if i > 0 and formatted_lines and formatted_lines[-1].strip():
1065
+ formatted_lines.append('')
1066
+ formatted_lines.append(line_stripped)
1067
+ if i < len(lines) - 1 and not lines[i + 1].strip().startswith('>'):
1068
+ formatted_lines.append('')
1069
+
1070
+ # Tables
1071
+ elif '|' in line_stripped:
1072
+ formatted_lines.append(line_stripped)
1073
+
1074
+ # Regular text
1075
+ else:
1076
+ # Check if this line ends a sentence
1077
+ prev_line = formatted_lines[-1] if formatted_lines else ''
1078
+
1079
+ # Add paragraph break after complete sentences
1080
+ if (prev_line and
1081
+ not prev_line.startswith('#') and
1082
+ not prev_line.startswith('-') and
1083
+ not prev_line.startswith('*') and
1084
+ not re.match(r'^\d+\.', prev_line) and
1085
+ (prev_line.endswith('.') or
1086
+ prev_line.endswith('!') or
1087
+ prev_line.endswith('?') or
1088
+ prev_line.endswith(':**') or
1089
+ prev_line.endswith('다.') or
1090
+ prev_line.endswith('요.') or
1091
+ prev_line.endswith('니다.') or
1092
+ prev_line.endswith('습니다.'))):
1093
+ # This is a new paragraph
1094
+ formatted_lines.append('')
1095
+
1096
+ formatted_lines.append(line_stripped)
1097
+
1098
+ # Join lines
1099
+ result = '\n'.join(formatted_lines)
1100
+
1101
+ # Clean up excessive blank lines (max 2)
1102
+ while '\n\n\n\n' in result:
1103
+ result = result.replace('\n\n\n\n', '\n\n')
1104
+ while '\n\n\n' in result:
1105
+ result = result.replace('\n\n\n', '\n\n')
1106
+
1107
+ return result.strip()
1108
+
1109
  def _clean_markdown_response(self, text: str) -> str:
1110
  """Clean and fix common markdown formatting issues"""
1111
  # Remove any duplicate markers or broken formatting