Yusuke710 commited on
Commit
e28bc31
·
verified ·
1 Parent(s): 7524f15

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. .gitignore +3 -1
  2. .gradio/certificate.pem +31 -0
  3. chatUI.py +264 -23
.gitignore CHANGED
@@ -162,4 +162,6 @@ cython_debug/
162
  #.idea/
163
 
164
  character_config.yaml
165
- sample_output.txt
 
 
 
162
  #.idea/
163
 
164
  character_config.yaml
165
+ sample_output.txt
166
+ data_collection/
167
+ data
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
chatUI.py CHANGED
@@ -3,8 +3,8 @@ import yaml
3
  import os
4
  import datetime
5
 
6
- from llm import AVAILABLE_LLMS, create_client, get_response_from_llm
7
- from character import load_character_config, get_character_response
8
 
9
  # Directory containing character YAML files
10
  CHARACTER_DIR = "characters"
@@ -198,28 +198,33 @@ class ChatApp:
198
  def chat(self, user_input, chat_history, selected_llm):
199
  """Handle the chat interaction."""
200
  if not user_input:
201
- return gr.update(), chat_history
202
 
203
  if self.character_config is None:
204
- return gr.update(value="No character selected."), chat_history
205
 
206
  # Set the API key based on the selected LLM
207
  api_key = self.api_key_dict.get(selected_llm, None)
208
  if api_key:
209
  self.set_environment_api_key(selected_llm, api_key)
210
  else:
211
- return gr.update(value="Please set the API key for the selected LLM."), chat_history
212
 
213
  # Get the character's response using the provided function
214
  try:
215
  response_content = get_character_response(user_input, self.character_config, llm_model=selected_llm)
 
 
 
 
 
 
 
 
 
216
  except Exception as e:
217
- return gr.update(value=f"Error during LLM processing: {e}"), chat_history
218
-
219
- # Update chat history
220
- chat_history.append((user_input, response_content))
221
-
222
- return "", chat_history
223
 
224
  def set_environment_api_key(self, llm, api_key):
225
  """Set the environment variable for the API key based on the LLM."""
@@ -231,21 +236,146 @@ class ChatApp:
231
  os.environ["OPENROUTER_API_KEY"] = api_key
232
 
233
  def save_conversation(self, chat_history):
234
- """Save the conversation to a file in the /conversations directory."""
235
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
236
- filename = f"conversation_{timestamp}.txt"
237
- filepath = os.path.join(CONVERSATION_DIR, filename)
 
 
 
238
  character_name = self.character_config['character']['name'] if self.character_config else "AI"
239
- with open(filepath, 'w') as f:
240
- for idx, (user_msg, ai_msg) in enumerate(chat_history):
241
- f.write(f"User: {user_msg}\n")
242
- f.write(f"{character_name}: {ai_msg}\n\n")
243
- return f"Conversation saved as {filename} in /{CONVERSATION_DIR} directory."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
  def new_conversation(self):
246
- """Start a new conversation by resetting chat history."""
247
  return [], []
248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  # Instantiate the ChatApp
250
  app = ChatApp()
251
 
@@ -269,7 +399,6 @@ with gr.Blocks() as demo:
269
  character_dropdown = gr.Dropdown(
270
  label="Select Character", choices=["New Character"] + character_names, value=character_names[0] if character_names else "New Character"
271
  )
272
-
273
  # Character Details
274
  with gr.Accordion("Character Details", open=True):
275
  name_input = gr.Textbox(label="Name")
@@ -320,7 +449,7 @@ with gr.Blocks() as demo:
320
 
321
  with gr.Tab("Chat"):
322
  gr.Markdown("## Chat Interface")
323
- chatbot = gr.Chatbot()
324
  user_input = gr.Textbox(label="Your Message:", placeholder="Type your message here...")
325
  send_button = gr.Button("Send")
326
 
@@ -347,7 +476,119 @@ with gr.Blocks() as demo:
347
  save_status = gr.Textbox(label="Save Status", interactive=False)
348
  save_button.click(app.save_conversation, inputs=[chatbot], outputs=save_status)
349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
 
 
 
 
 
351
 
352
  # Initialize UI components with default character data
353
  if app.character_config:
 
3
  import os
4
  import datetime
5
 
6
+ from llm import AVAILABLE_LLMS, create_client, get_response_from_llm, get_batch_responses_from_llm
7
+ from character import load_character_config, get_character_response, build_prompt, build_system_prompt
8
 
9
  # Directory containing character YAML files
10
  CHARACTER_DIR = "characters"
 
198
  def chat(self, user_input, chat_history, selected_llm):
199
  """Handle the chat interaction."""
200
  if not user_input:
201
+ return "", []
202
 
203
  if self.character_config is None:
204
+ return "", [{"role": "assistant", "content": "No character selected."}]
205
 
206
  # Set the API key based on the selected LLM
207
  api_key = self.api_key_dict.get(selected_llm, None)
208
  if api_key:
209
  self.set_environment_api_key(selected_llm, api_key)
210
  else:
211
+ return "", [{"role": "assistant", "content": "Please set the API key for the selected LLM."}]
212
 
213
  # Get the character's response using the provided function
214
  try:
215
  response_content = get_character_response(user_input, self.character_config, llm_model=selected_llm)
216
+
217
+ # Create new messages list with proper format
218
+ messages = chat_history + [
219
+ {"role": "user", "content": user_input},
220
+ {"role": "assistant", "content": response_content}
221
+ ]
222
+
223
+ return "", messages
224
+
225
  except Exception as e:
226
+ error_message = [{"role": "assistant", "content": f"Error during LLM processing: {e}"}]
227
+ return "", error_message
 
 
 
 
228
 
229
  def set_environment_api_key(self, llm, api_key):
230
  """Set the environment variable for the API key based on the LLM."""
 
236
  os.environ["OPENROUTER_API_KEY"] = api_key
237
 
238
  def save_conversation(self, chat_history):
239
+ """Save the conversation to a file in the /home/user/conversations directory."""
240
+ # Use the persistent storage path for Hugging Face Spaces
241
+ folder = os.path.join("/home/user/conversations")
242
+ if not os.path.exists(folder):
243
+ os.makedirs(folder, exist_ok=True)
244
+
245
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
246
  character_name = self.character_config['character']['name'] if self.character_config else "AI"
247
+ safe_char_name = "".join(c for c in character_name if c.isalnum() or c in ('-', '_')).lower()
248
+ filename = f"chat_{timestamp}_{safe_char_name}.txt"
249
+ filepath = os.path.join(folder, filename)
250
+
251
+ try:
252
+ with open(filepath, 'w', encoding='utf-8') as f:
253
+ f.write(f"Conversation with {character_name}\n")
254
+ f.write(f"Timestamp: {timestamp}\n")
255
+ f.write("-" * 50 + "\n\n")
256
+ for message in chat_history:
257
+ role = message["role"]
258
+ content = message["content"]
259
+ if role == "user":
260
+ f.write(f"User: {content}\n")
261
+ else:
262
+ f.write(f"{character_name}: {content}\n")
263
+ f.write("\n")
264
+ return f"Conversation saved as {filename}"
265
+ except Exception as e:
266
+ return f"Error saving conversation: {e}"
267
 
268
  def new_conversation(self):
269
+ """Clear the chat history."""
270
  return [], []
271
 
272
+ def collect_data(self, user_question, k, selected_llm):
273
+ """Have LLM respond K times to a user question for data collection."""
274
+ if not user_question:
275
+ return "Please enter a question.", None
276
+ if self.character_config is None:
277
+ return "No character selected.", None
278
+ api_key = self.api_key_dict.get(selected_llm, None)
279
+ if api_key:
280
+ self.set_environment_api_key(selected_llm, api_key)
281
+ else:
282
+ return "Please set the API key for the selected LLM.", None
283
+
284
+ prompt = build_prompt(user_question, self.character_config)
285
+ system_prompt = build_system_prompt(self.character_config['character']['name'])
286
+ client, model = create_client(selected_llm)
287
+ try:
288
+ responses, _ = get_batch_responses_from_llm(
289
+ msg=prompt,
290
+ client=client,
291
+ model=model,
292
+ system_message=system_prompt,
293
+ temperature=0.9,
294
+ n_responses=int(k)
295
+ )
296
+ # Format responses for the Dataframe with separate up/down columns
297
+ responses_data = [[str(i+1), resp, "⬆️", "⬇️"] for i, resp in enumerate(responses)]
298
+ return "", responses_data
299
+ except Exception as e:
300
+ return f"Error during LLM processing: {e}", None
301
+
302
+ def save_data_collection(self, user_question, ranked_responses):
303
+ """Save the question, responses, and their rankings in a format optimized for DPO training."""
304
+ # Use the persistent storage path for Hugging Face Spaces
305
+ folder = os.path.join("/home/user/dpo_training")
306
+ if not os.path.exists(folder):
307
+ os.makedirs(folder, exist_ok=True)
308
+
309
+ try:
310
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
311
+ character_config = self.character_config or {}
312
+ character_name = character_config.get('character', {}).get('name', "Unknown")
313
+ safe_char_name = "".join(c for c in character_name if c.isalnum() or c in ('-', '_')).lower()
314
+
315
+ jsonl_filename = f"dpo_{timestamp}_{safe_char_name}.jsonl"
316
+ jsonl_file = os.path.join(folder, jsonl_filename)
317
+
318
+ system_prompt = build_system_prompt(character_name) if self.character_config else ""
319
+
320
+ import json
321
+ with open(jsonl_file, 'w', encoding='utf-8') as f:
322
+ for i, (rank1, chosen_response, _, _) in enumerate(ranked_responses[:-1]):
323
+ for j, (rank2, rejected_response, _, _) in enumerate(ranked_responses[i+1:], i+1):
324
+ messages = [
325
+ {
326
+ "role": "system",
327
+ "content": system_prompt
328
+ },
329
+ {
330
+ "role": "user",
331
+ "content": user_question
332
+ }
333
+ ]
334
+
335
+ dpo_example = {
336
+ "messages": messages,
337
+ "chosen": chosen_response,
338
+ "rejected": rejected_response,
339
+ "prompt": user_question,
340
+ "system_prompt": system_prompt,
341
+ "character_name": character_name,
342
+ "ranking_info": {
343
+ "chosen_rank": int(rank1),
344
+ "rejected_rank": int(rank2)
345
+ },
346
+ "metadata": {
347
+ "timestamp": timestamp,
348
+ "session_id": timestamp
349
+ }
350
+ }
351
+
352
+ f.write(json.dumps(dpo_example, ensure_ascii=False) + '\n')
353
+
354
+ return f"Rankings saved to {jsonl_filename}"
355
+ except Exception as e:
356
+ return f"Error saving rankings: {e}"
357
+
358
+ def move_row(self, data, evt: gr.SelectData):
359
+ """Move a row up or down based on which column was clicked."""
360
+ if not data or not evt:
361
+ return data
362
+
363
+ row_idx = evt.index[0]
364
+ col_idx = evt.index[1] # Get column index instead of name
365
+
366
+ if col_idx == 2 and row_idx > 0: # Up column (index 2)
367
+ # Swap with row above
368
+ data[row_idx], data[row_idx-1] = data[row_idx-1], data[row_idx]
369
+ elif col_idx == 3 and row_idx < len(data) - 1: # Down column (index 3)
370
+ # Swap with row below
371
+ data[row_idx], data[row_idx+1] = data[row_idx+1], data[row_idx]
372
+
373
+ # Update ranks
374
+ for i, row in enumerate(data):
375
+ row[0] = str(i + 1)
376
+
377
+ return data
378
+
379
  # Instantiate the ChatApp
380
  app = ChatApp()
381
 
 
399
  character_dropdown = gr.Dropdown(
400
  label="Select Character", choices=["New Character"] + character_names, value=character_names[0] if character_names else "New Character"
401
  )
 
402
  # Character Details
403
  with gr.Accordion("Character Details", open=True):
404
  name_input = gr.Textbox(label="Name")
 
449
 
450
  with gr.Tab("Chat"):
451
  gr.Markdown("## Chat Interface")
452
+ chatbot = gr.Chatbot(type="messages")
453
  user_input = gr.Textbox(label="Your Message:", placeholder="Type your message here...")
454
  send_button = gr.Button("Send")
455
 
 
476
  save_status = gr.Textbox(label="Save Status", interactive=False)
477
  save_button.click(app.save_conversation, inputs=[chatbot], outputs=save_status)
478
 
479
+ with gr.Tab("Data Collection"):
480
+ gr.Markdown("""
481
+ ## Data Collection Interface
482
+ This interface helps collect multiple AI responses for the same question to evaluate response quality.
483
+
484
+ ### How to use:
485
+ 1. Enter your question
486
+ 2. Choose how many responses you want
487
+ 3. Generate responses
488
+ 4. Use ⬆️ and ⬇️ buttons to reorder responses (top = best)
489
+ 5. Save the rankings
490
+ """)
491
+
492
+ with gr.Row():
493
+ with gr.Column(scale=3):
494
+ data_question_input = gr.Textbox(
495
+ label="Question for the AI Character",
496
+ placeholder="Type your question here...",
497
+ lines=3
498
+ )
499
+ with gr.Column(scale=1):
500
+ k_input = gr.Slider(
501
+ minimum=2,
502
+ maximum=10,
503
+ value=5,
504
+ step=1,
505
+ label="Number of Responses to Generate"
506
+ )
507
+ llm_dropdown_data = gr.Dropdown(
508
+ label="Select Language Model",
509
+ choices=app.available_llms,
510
+ value=app.available_llms[0] if app.available_llms else None
511
+ )
512
+
513
+ generate_button = gr.Button("🔄 Generate Responses", variant="primary")
514
+
515
+ collection_status = gr.Textbox(
516
+ label="Generation Status",
517
+ interactive=False,
518
+ visible=False
519
+ )
520
+
521
+ # New interface for ranking responses
522
+ responses_df = gr.Dataframe(
523
+ headers=["Rank", "Response", "Up", "Down"],
524
+ datatype=["str", "str", "str", "str"],
525
+ col_count=(4, "fixed"),
526
+ interactive=True,
527
+ wrap=True,
528
+ row_count=10,
529
+ label="Click ⬆️ or ⬇️ to reorder responses (top = best)",
530
+ type="array"
531
+ )
532
+
533
+ def move_row(data, evt: gr.SelectData):
534
+ """Move a row up or down based on which column was clicked."""
535
+ if not data or not evt:
536
+ return data
537
+
538
+ row_idx = evt.index[0]
539
+ col_idx = evt.index[1] # Get column index instead of name
540
+
541
+ if col_idx == 2 and row_idx > 0: # Up column (index 2)
542
+ # Swap with row above
543
+ data[row_idx], data[row_idx-1] = data[row_idx-1], data[row_idx]
544
+ elif col_idx == 3 and row_idx < len(data) - 1: # Down column (index 3)
545
+ # Swap with row below
546
+ data[row_idx], data[row_idx+1] = data[row_idx+1], data[row_idx]
547
+
548
+ # Update ranks
549
+ for i, row in enumerate(data):
550
+ row[0] = str(i + 1)
551
+
552
+ return data
553
+
554
+ # Add click handler for both Up and Down columns
555
+ responses_df.select(
556
+ move_row,
557
+ inputs=[responses_df],
558
+ outputs=[responses_df]
559
+ )
560
+
561
+ submit_ranking_button = gr.Button("💾 Save Rankings", variant="secondary")
562
+ data_save_status = gr.Textbox(
563
+ label="Save Status",
564
+ interactive=False,
565
+ visible=False
566
+ )
567
+
568
+ # Show status messages when they contain content
569
+ collection_status.change(
570
+ lambda x: gr.update(visible=bool(x.strip())),
571
+ inputs=[collection_status],
572
+ outputs=[collection_status]
573
+ )
574
+
575
+ data_save_status.change(
576
+ lambda x: gr.update(visible=bool(x.strip())),
577
+ inputs=[data_save_status],
578
+ outputs=[data_save_status]
579
+ )
580
+
581
+ generate_button.click(
582
+ app.collect_data,
583
+ inputs=[data_question_input, k_input, llm_dropdown_data],
584
+ outputs=[collection_status, responses_df]
585
+ )
586
 
587
+ submit_ranking_button.click(
588
+ app.save_data_collection,
589
+ inputs=[data_question_input, responses_df],
590
+ outputs=[data_save_status]
591
+ )
592
 
593
  # Initialize UI components with default character data
594
  if app.character_config: