kulia-moon commited on
Commit
2f2a5f4
·
verified ·
1 Parent(s): dedde71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +489 -131
app.py CHANGED
@@ -10,11 +10,14 @@ import datetime
10
 
11
  # --- Configuration for the Gradio app's internal logic ---
12
  # Local cache directory (data will be accumulated here first)
13
- OUTPUT_DIR = "generated"
14
- DATA_FILE = os.path.join(OUTPUT_DIR, f"conversations.jsonl")
 
 
15
 
16
  # Hugging Face Dataset repository to push to
17
- HF_DATASET_REPO_ID = "kulia-moon/LimeStory-1.0" # This is the target dataset
 
18
 
19
  # Configure OpenAI client for Pollinations.ai
20
  client = openai.OpenAI(
@@ -23,7 +26,6 @@ client = openai.OpenAI(
23
  )
24
 
25
  # Define ALL available models from https://text.pollinations.ai/models
26
- # This list is more comprehensive. Speeds are approximate relative to each other.
27
  AVAILABLE_MODELS = {
28
  "openai": {"description": "GPT-4o mini (generally fast, good all-rounder)", "speed": "Fast"},
29
  "gemini": {"description": "Gemini 2.0 Flash (designed for speed)", "speed": "Very Fast"},
@@ -83,8 +85,8 @@ DEFAULT_INITIAL_PROMPTS = [
83
  def chat(system, prompt, selected_model_name, seed=None, num_exchanges=5):
84
  if seed is None:
85
  seed = random.randint(0, 1000000)
86
- random.seed(seed)
87
-
88
  conversation = [
89
  {"from": "system", "value": system},
90
  {"from": "human", "value": prompt}
@@ -93,40 +95,54 @@ def chat(system, prompt, selected_model_name, seed=None, num_exchanges=5):
93
  {"role": "system", "content": system},
94
  {"role": "user", "content": prompt}
95
  ]
96
-
97
  try:
98
- response = client.chat.completions.create(
 
99
  model=selected_model_name,
100
  messages=messages,
101
  max_tokens=150,
102
  temperature=0.9,
103
- seed=seed
104
  )
105
- gpt_response = response.choices[0].message.content.strip()
106
-
107
- conversation.append({"from": "gpt", "value": gpt_response})
108
-
109
- for i in range(num_exchanges - 1): # Loop for subsequent exchanges
 
 
 
110
  follow_up_prompt_messages = [
111
- {"role": "system", "content": f"You are a helpful and engaging assistant. Based on the last response, generate a polite, open-ended, and cute follow-up question or statement to keep a friendly conversation going. Make it relevant to the last message and consistent with a 'cute' and positive tone."},
112
- {"role": "assistant", "content": gpt_response},
113
- {"role": "user", "content": "Generate a cute and friendly follow-up."}
114
  ]
115
-
116
- follow_up_response = client.chat.completions.create(
117
- model=selected_model_name,
118
  messages=follow_up_prompt_messages,
119
  max_tokens=70,
120
  temperature=0.8,
121
- seed=seed + 1000 + i # Vary seed for follow-ups
122
  )
123
- follow_up = follow_up_response.choices[0].message.content.strip()
124
-
125
- conversation.append({"from": "human", "value": follow_up})
126
-
127
- messages.append({"role": "assistant", "content": gpt_response})
128
- messages.append({"role": "user", "content": follow_up})
129
- gpt_response = follow_up_response.choices[0].message.content.strip() # Update gpt_response for next turn's context
 
 
 
 
 
 
 
 
 
 
130
 
131
  return conversation
132
  except Exception as e:
@@ -136,141 +152,293 @@ def chat(system, prompt, selected_model_name, seed=None, num_exchanges=5):
136
  return conversation
137
 
138
  # --- Hugging Face Push Function (for Dataset) ---
139
- # This function will attempt to use the HF_TOKEN environment variable automatically.
140
- def push_to_huggingface_dataset():
141
  api = HfApi()
142
-
143
- # Check if HF_TOKEN is available (it should be set as a Space Secret)
144
  hf_token = os.environ.get("HF_TOKEN")
145
  if not hf_token:
146
  log_message = "Hugging Face token (HF_TOKEN environment variable) not found. Cannot push to Hub."
147
  print(log_message)
148
  return log_message
149
 
 
 
 
 
 
150
  try:
151
- # Use a temporary file for upload to ensure it's fresh
152
- temp_data_file = "temp_conversations_to_upload.jsonl"
153
-
154
- # Read all conversations from DATA_FILE
155
- all_conversations = []
156
- if os.path.exists(DATA_FILE):
157
- with open(DATA_FILE, "r") as f:
158
- for line in f:
159
- all_conversations.append(json.loads(line.strip()))
160
-
161
- if not all_conversations:
162
- log_message = "No conversations to push to the dataset."
163
- print(log_message)
164
- return log_message
165
-
166
- # Write data to a temporary file
167
- with open(temp_data_file, "w") as f:
168
- for conv in all_conversations:
169
- f.write(json.dumps(conv) + "\n")
170
-
171
- # Push the temporary file to the dataset repo
172
  current_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
173
- commit_message = f"Update conversations.jsonl from Gradio app on {current_time_str} (An Nhơn, Binh Dinh, Vietnam)"
174
  api.upload_file(
175
- path_or_fileobj=DATA_FILE ,
176
- path_in_repo=DATA_FILE, # The target file name within the dataset repo
177
  repo_id=HF_DATASET_REPO_ID,
178
- repo_type="dataset", # Specify repo_type="dataset"
179
  commit_message=commit_message,
180
- token=hf_token # Use the token from environment variable
181
  )
182
- # Clean up temporary file
183
- os.remove(temp_data_file)
184
-
185
- log_message = f"Successfully pushed updated conversations.jsonl to dataset {HF_DATASET_REPO_ID}"
186
  print(log_message)
187
  return log_message
188
  except Exception as e:
189
- log_message = f"Error pushing to Hugging Face dataset {HF_DATASET_REPO_ID}: {e}"
190
  print(log_message)
191
- if os.path.exists(temp_data_file):
192
- os.remove(temp_data_file) # Clean up temp file even on error
193
  return log_message
194
 
195
- # --- Gradio Interface Logic ---
196
-
197
- def generate_and_display_conversations(num_conversations_input, custom_prompts_input, custom_system_prompt_input):
198
- """
199
- Function to be called by Gradio to generate and return conversations,
200
- and then automatically push to the dataset.
201
- """
202
  num_conversations = int(num_conversations_input)
203
  if num_conversations <= 0:
204
  return "Please enter a number of conversations greater than zero.", ""
205
 
206
  os.makedirs(OUTPUT_DIR, exist_ok=True)
207
-
 
208
  existing_conversations = []
209
  if os.path.exists(DATA_FILE):
210
  with open(DATA_FILE, "r") as f:
211
  for line in f:
212
- existing_conversations.append(json.loads(line.strip()))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  current_prompts = DEFAULT_INITIAL_PROMPTS
215
  if custom_prompts_input:
216
- # Split custom prompts by comma and clean up whitespace
217
  parsed_custom_prompts = [p.strip() for p in custom_prompts_input.split(',') if p.strip()]
218
  if parsed_custom_prompts:
219
  current_prompts = parsed_custom_prompts
220
 
221
- new_conversations = []
222
- model_names_to_use = list(AVAILABLE_MODELS.keys())
223
-
224
- generation_log = []
225
- current_time_loc = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " (An Nhơn, Binh Dinh, Vietnam)"
226
- generation_log.append(f"Starting conversation generation at {current_time_loc}")
227
- generation_log.append(f"Generating {num_conversations} conversations.")
228
- generation_log.append(f"Models to be used: {', '.join(model_names_to_use)}")
229
 
230
  for i in tqdm(range(num_conversations), desc="Generating conversations"):
231
  seed = random.randint(0, 1000000)
232
-
233
- # Select system prompt: user's custom prompt if provided, else random from defaults
234
  if custom_system_prompt_input:
235
  system = custom_system_prompt_input.strip()
236
  else:
237
  system = random.choice(role_play_prompts)
238
-
239
  random_name = random.choice(DIVERSE_NAMES)
240
  prompt_template = random.choice(current_prompts)
241
- # Ensure that if [NAME] is not in the template, it's not a problem
242
  prompt = prompt_template.replace("[NAME]", random_name)
243
 
244
- selected_model_name = random.choice(model_names_to_use) # Randomly pick from ALL models
245
-
246
- generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Generating conv {i+1}/{num_conversations} with '{selected_model_name}' (System: '{system[:50]}...')") # Log first 50 chars of system prompt
247
- conversation = chat(system, prompt, selected_model_name, seed=seed, num_exchanges=5)
 
 
 
248
 
249
- if len(conversation) > 1 and not any(d.get("from") == "error" for d in conversation):
250
- new_conversations.append({"model_used": selected_model_name, "conversations": conversation})
251
- generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Successfully generated conv {i+1}/{num_conversations}.")
 
 
 
 
 
 
 
 
 
 
 
252
  else:
253
- generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Skipping conv {i+1}/{num_conversations} due to error or no content.")
254
  if conversation and conversation[-1].get("from") == "error":
255
  generation_log.append(f" Error details: {conversation[-1]['value']}")
256
 
257
- all_conversations = existing_conversations + new_conversations
258
-
259
- # Save to JSONL in the /generated folder
260
  with open(DATA_FILE, "w") as f:
261
  for conv in all_conversations:
262
  f.write(json.dumps(conv) + "\n")
263
-
264
- generation_log.append(f"Saved {len(new_conversations)} new conversations to {DATA_FILE} (total: {len(all_conversations)}).")
265
- generation_log.append("Attempting to push to Hugging Face Dataset...")
266
 
267
- # --- Auto-push to Hugging Face Dataset ---
268
- push_status = push_to_huggingface_dataset()
 
 
 
 
 
269
  generation_log.append(push_status)
270
  generation_log.append(f"Process complete at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} (An Nhơn, Binh Dinh, Vietnam)")
271
 
272
  return json.dumps(all_conversations, indent=2), "\n".join(generation_log)
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  # Gradio Interface setup
275
  with gr.Blocks() as demo:
276
  gr.Markdown("# Cute AI Conversation Generator 🐾")
@@ -279,45 +447,235 @@ with gr.Blocks() as demo:
279
  f"Generated data is saved and pushed to the Hugging Face dataset `{HF_DATASET_REPO_ID}`."
280
  )
281
 
282
- with gr.Row():
283
- num_conversations_input = gr.Slider(minimum=1, maximum=20, value=3, step=1, label="Number of Conversations to Generate", info="More conversations take longer and might hit API limits.")
284
-
285
- custom_system_prompt_input = gr.Textbox(
286
- label="Custom System Prompt (optional)",
287
- placeholder="e.g., You are a helpful and kind AI assistant.",
288
- info="Define the AI's role or personality. If left empty, a random cute role-play prompt will be used.",
289
- lines=3
290
- )
291
 
292
- custom_prompts_input = gr.Textbox(
293
- label="Custom Initial Prompts (optional)",
294
- placeholder="e.g., What's your favorite color?, Tell me a joke, What makes you happy?",
295
- info="Enter multiple prompts separated by commas. If left empty, default prompts will be used. Make sure to include '[NAME]' if you want a name inserted.",
296
- lines=3
297
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
- generate_button = gr.Button("Generate & Push Conversations")
300
 
301
- output_conversations = gr.JSON(label="Generated Conversations (Content of conversations.jsonl)")
302
- output_log = gr.Textbox(label="Process Log", interactive=False, lines=10, max_lines=20) # Increased max_lines for more log visibility
303
-
304
- generate_button.click(
305
- fn=generate_and_display_conversations,
306
- inputs=[num_conversations_input, custom_prompts_input, custom_system_prompt_input],
307
- outputs=[output_conversations, output_log],
308
- show_progress=True
309
- )
310
-
311
  gr.Markdown("---")
312
  gr.Markdown(
313
- "**Note on Push to Hub:** This Space is configured to automatically push generated data to "
 
314
  f"`{HF_DATASET_REPO_ID}` using a Hugging Face token securely stored as a Space Secret (`HF_TOKEN`). "
315
  "User tokens are not required."
316
  )
317
  current_datetime_vietnam = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=7))).strftime('%Y-%m-%d %H:%M:%S %Z%z')
318
- gr.Markdown(f"Current server time: {current_datetime_vietnam} (Vietnam)")
319
 
320
 
321
  # Launch the Gradio app
322
  if __name__ == "__main__":
 
 
 
 
323
  demo.launch(debug=True, share=False)
 
10
 
11
  # --- Configuration for the Gradio app's internal logic ---
12
  # Local cache directory (data will be accumulated here first)
13
+ OUTPUT_DIR = "generated"
14
+ DATA_FILE = os.path.join(OUTPUT_DIR, "conversations.jsonl")
15
+ COMMUNITY_PROMPTS_FILE = os.path.join(OUTPUT_DIR, "community_prompts.jsonl")
16
+ COMMIT_TEMPLATES_FILE = os.path.join(OUTPUT_DIR, "commits.json") # New: Commit templates file
17
 
18
  # Hugging Face Dataset repository to push to
19
+ HF_DATASET_REPO_ID = "kulia-moon/LimeStory-1.0" # This is the target dataset for conversations
20
+ HF_COMMUNITY_PROMPT_FILE_IN_REPO = "community_prompts.jsonl" # Target file name within the dataset repo for community prompts
21
 
22
  # Configure OpenAI client for Pollinations.ai
23
  client = openai.OpenAI(
 
26
  )
27
 
28
  # Define ALL available models from https://text.pollinations.ai/models
 
29
  AVAILABLE_MODELS = {
30
  "openai": {"description": "GPT-4o mini (generally fast, good all-rounder)", "speed": "Fast"},
31
  "gemini": {"description": "Gemini 2.0 Flash (designed for speed)", "speed": "Very Fast"},
 
85
  def chat(system, prompt, selected_model_name, seed=None, num_exchanges=5):
86
  if seed is None:
87
  seed = random.randint(0, 1000000)
88
+ random.seed(seed) # Set for reproducibility for the whole conversation generation
89
+
90
  conversation = [
91
  {"from": "system", "value": system},
92
  {"from": "human", "value": prompt}
 
95
  {"role": "system", "content": system},
96
  {"role": "user", "content": prompt}
97
  ]
98
+
99
  try:
100
+ # Initial AI response
101
+ ai_response_obj = client.chat.completions.create(
102
  model=selected_model_name,
103
  messages=messages,
104
  max_tokens=150,
105
  temperature=0.9,
106
+ seed=seed # Use base seed for first AI response
107
  )
108
+ ai_response_content = ai_response_obj.choices[0].message.content.strip()
109
+
110
+ conversation.append({"from": "gpt", "value": ai_response_content})
111
+ messages.append({"role": "assistant", "content": ai_response_content})
112
+
113
+ # Loop for subsequent exchanges
114
+ for i in range(num_exchanges - 1): # We already did 1 exchange (human initial -> AI response)
115
+ # AI generates the *human's* follow-up question/statement
116
  follow_up_prompt_messages = [
117
+ {"role": "system", "content": "You are a helpful and engaging assistant. Based on the last assistant response, generate a polite, open-ended, and cute follow-up question or statement from a user to keep a friendly conversation going. Make it relevant to the last message and consistent with a 'cute' and positive tone."},
118
+ {"role": "assistant", "content": ai_response_content}, # Use the last AI response as context
119
+ {"role": "user", "content": "Generate a cute and friendly follow-up question/statement (max 70 words)."}
120
  ]
121
+
122
+ human_follow_up_obj = client.chat.completions.create(
123
+ model=selected_model_name, # Can use the same model
124
  messages=follow_up_prompt_messages,
125
  max_tokens=70,
126
  temperature=0.8,
127
+ seed=seed + 1000 + i # Vary seed for human follow-up generation
128
  )
129
+ human_follow_up_content = human_follow_up_obj.choices[0].message.content.strip()
130
+
131
+ conversation.append({"from": "human", "value": human_follow_up_content})
132
+ messages.append({"role": "user", "content": human_follow_up_content})
133
+
134
+ # AI generates its next response based on the human follow-up
135
+ ai_response_obj = client.chat.completions.create(
136
+ model=selected_model_name,
137
+ messages=messages, # messages now includes the human follow-up
138
+ max_tokens=150,
139
+ temperature=0.9,
140
+ seed=seed + 2000 + i # Vary seed for next AI response
141
+ )
142
+ ai_response_content = ai_response_obj.choices[0].message.content.strip()
143
+
144
+ conversation.append({"from": "gpt", "value": ai_response_content})
145
+ messages.append({"role": "assistant", "content": ai_response_content})
146
 
147
  return conversation
148
  except Exception as e:
 
152
  return conversation
153
 
154
  # --- Hugging Face Push Function (for Dataset) ---
155
+ def push_file_to_huggingface_dataset(file_path, path_in_repo, commit_message_prefix):
 
156
  api = HfApi()
157
+
 
158
  hf_token = os.environ.get("HF_TOKEN")
159
  if not hf_token:
160
  log_message = "Hugging Face token (HF_TOKEN environment variable) not found. Cannot push to Hub."
161
  print(log_message)
162
  return log_message
163
 
164
+ if not os.path.exists(file_path) or os.stat(file_path).st_size == 0:
165
+ log_message = f"No data in {file_path} to push to the dataset."
166
+ print(log_message)
167
+ return log_message
168
+
169
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  current_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
171
+ commit_message = f"{commit_message_prefix} on {current_time_str} (An Nhơn, Binh Dinh, Vietnam)"
172
  api.upload_file(
173
+ path_or_fileobj=file_path,
174
+ path_in_repo=path_in_repo,
175
  repo_id=HF_DATASET_REPO_ID,
176
+ repo_type="dataset",
177
  commit_message=commit_message,
178
+ token=hf_token
179
  )
180
+ log_message = f"Successfully pushed {path_in_repo} to dataset {HF_DATASET_REPO_ID}"
 
 
 
181
  print(log_message)
182
  return log_message
183
  except Exception as e:
184
+ log_message = f"Error pushing {path_in_repo} to Hugging Face dataset {HF_DATASET_REPO_ID}: {e}"
185
  print(log_message)
 
 
186
  return log_message
187
 
188
+ # --- Main Generation and Push Function ---
189
+ def generate_and_display_conversations(num_conversations_input, custom_prompts_input, custom_system_prompt_input,
190
+ commit_subject, commit_body, selected_model_name_input): # New: selected_model_name_input
 
 
 
 
191
  num_conversations = int(num_conversations_input)
192
  if num_conversations <= 0:
193
  return "Please enter a number of conversations greater than zero.", ""
194
 
195
  os.makedirs(OUTPUT_DIR, exist_ok=True)
196
+
197
+ # --- Load and Clean Existing Conversations ---
198
  existing_conversations = []
199
  if os.path.exists(DATA_FILE):
200
  with open(DATA_FILE, "r") as f:
201
  for line in f:
202
+ try:
203
+ existing_conversations.append(json.loads(line.strip()))
204
+ except json.JSONDecodeError as e:
205
+ print(f"Skipping malformed JSON line in {DATA_FILE}: {line.strip()} - {e}")
206
+
207
+ # Deduplicate existing conversations
208
+ seen_conversations = set()
209
+ cleaned_existing_conversations = []
210
+ for conv_entry in existing_conversations:
211
+ # Use a string representation of the whole entry for deduplication
212
+ conv_str = json.dumps(conv_entry, sort_keys=True)
213
+ if conv_str not in seen_conversations:
214
+ cleaned_existing_conversations.append(conv_entry)
215
+ seen_conversations.add(conv_str)
216
+
217
+ # Validate and filter existing conversations for completeness (expected length)
218
+ expected_msg_len = lambda n_exchanges: 1 + 1 + n_exchanges + (n_exchanges - 1) # System + initial human + AI turns + human follow-ups
219
+
220
+ validated_existing_conversations = []
221
+ initial_cleaned_count = len(cleaned_existing_conversations)
222
+ for conv_entry in cleaned_existing_conversations:
223
+ conv_list = conv_entry.get("conversations", [])
224
+ # Assume num_exchanges was 5 for old conversations if not stored
225
+ # Or more robustly, infer from length.
226
+ # Given the fixed num_exchanges=5 for generation, we can check for this.
227
+ if len(conv_list) == expected_msg_len(5):
228
+ validated_existing_conversations.append(conv_entry)
229
+ else:
230
+ print(f"Skipping incomplete/malformed existing conversation (length {len(conv_list)} != {expected_msg_len(5)}): {conv_entry}")
231
+
232
+ all_conversations = list(validated_existing_conversations) # Start with clean existing ones
233
+
234
+ generation_log = []
235
+ current_time_loc = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " (An Nhơn, Binh Dinh, Vietnam)"
236
+ generation_log.append(f"Starting conversation generation at {current_time_loc}")
237
+ generation_log.append(f"Loaded and cleaned {len(validated_existing_conversations)} existing conversations (initially {initial_cleaned_count} before validation).")
238
+ generation_log.append(f"Generating {num_conversations} *new* conversations.")
239
+
240
+ model_names_to_use = list(AVAILABLE_MODELS.keys())
241
+ if selected_model_name_input and selected_model_name_input in model_names_to_use:
242
+ # If a specific model is selected, only use that one
243
+ model_selection_info = f"Specific model selected: '{selected_model_name_input}'"
244
+ else:
245
+ # If no specific model or invalid model, pick a random one
246
+ model_selection_info = f"No specific model selected or invalid, picking randomly from: {', '.join(model_names_to_use)}"
247
+ generation_log.append(model_selection_info)
248
+
249
 
250
  current_prompts = DEFAULT_INITIAL_PROMPTS
251
  if custom_prompts_input:
 
252
  parsed_custom_prompts = [p.strip() for p in custom_prompts_input.split(',') if p.strip()]
253
  if parsed_custom_prompts:
254
  current_prompts = parsed_custom_prompts
255
 
256
+ new_conversations_generated = []
257
+ expected_conversation_length = expected_msg_len(5) # Always 5 exchanges for new generations
 
 
 
 
 
 
258
 
259
  for i in tqdm(range(num_conversations), desc="Generating conversations"):
260
  seed = random.randint(0, 1000000)
261
+
 
262
  if custom_system_prompt_input:
263
  system = custom_system_prompt_input.strip()
264
  else:
265
  system = random.choice(role_play_prompts)
266
+
267
  random_name = random.choice(DIVERSE_NAMES)
268
  prompt_template = random.choice(current_prompts)
 
269
  prompt = prompt_template.replace("[NAME]", random_name)
270
 
271
+ # Determine the model to use for this specific conversation
272
+ if selected_model_name_input and selected_model_name_input in model_names_to_use:
273
+ selected_model_for_this_conv = selected_model_name_input
274
+ else:
275
+ selected_model_for_this_conv = random.choice(model_names_to_use)
276
+
277
+ generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Generating conv {i+1}/{num_conversations} with '{selected_model_for_this_conv}' (System: '{system[:50]}...')")
278
 
279
+ conversation = chat(system, prompt, selected_model_for_this_conv, seed=seed, num_exchanges=5)
280
+
281
+ if len(conversation) == expected_conversation_length and not any(d.get("from") == "error" for d in conversation):
282
+ new_conv_entry = {"model_used": selected_model_for_this_conv, "conversations": conversation}
283
+ # Add to all_conversations and new_conversations_generated only if not a duplicate of what's already *in memory*
284
+ # This handles duplicates from current batch or newly generated identical to existing
285
+ new_conv_str = json.dumps(new_conv_entry, sort_keys=True)
286
+ if new_conv_str not in seen_conversations:
287
+ all_conversations.append(new_conv_entry)
288
+ new_conversations_generated.append(new_conv_entry)
289
+ seen_conversations.add(new_conv_str) # Mark as seen
290
+ generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Successfully generated and added conv {i+1}/{num_conversations}.")
291
+ else:
292
+ generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Skipped conv {i+1}/{num_conversations} as it's a duplicate.")
293
  else:
294
+ generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Skipping conv {i+1}/{num_conversations} due to error or incorrect length ({len(conversation)} messages, expected {expected_conversation_length}).")
295
  if conversation and conversation[-1].get("from") == "error":
296
  generation_log.append(f" Error details: {conversation[-1]['value']}")
297
 
298
+ # Save all (cleaned existing + newly generated unique) conversations to JSONL
 
 
299
  with open(DATA_FILE, "w") as f:
300
  for conv in all_conversations:
301
  f.write(json.dumps(conv) + "\n")
 
 
 
302
 
303
+ generation_log.append(f"Saved {len(new_conversations_generated)} *new unique* conversations to {DATA_FILE} (total unique and validated: {len(all_conversations)}).")
304
+ generation_log.append("Attempting to push main conversations file to Hugging Face Dataset...")
305
+
306
+ # --- Auto-push main conversations to Hugging Face Dataset ---
307
+ # Use the custom commit message
308
+ commit_message = f"{commit_subject.strip()}\n\n{commit_body.strip()}" if commit_body.strip() else commit_subject.strip()
309
+ push_status = push_file_to_huggingface_dataset(DATA_FILE, "conversations.jsonl", commit_message)
310
  generation_log.append(push_status)
311
  generation_log.append(f"Process complete at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} (An Nhơn, Binh Dinh, Vietnam)")
312
 
313
  return json.dumps(all_conversations, indent=2), "\n".join(generation_log)
314
 
315
+ # --- Community Prompts Functions ---
316
+ def load_community_prompts():
317
+ prompts = []
318
+ if os.path.exists(COMMUNITY_PROMPTS_FILE):
319
+ with open(COMMUNITY_PROMPTS_FILE, "r") as f:
320
+ for line in f:
321
+ try:
322
+ prompts.append(json.loads(line.strip()))
323
+ except json.JSONDecodeError:
324
+ continue # Skip malformed lines
325
+ return prompts
326
+
327
+ def save_community_prompt(system_prompt, initial_prompt):
328
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
329
+
330
+ # Load existing prompts to deduplicate and append
331
+ existing_prompts = load_community_prompts()
332
+ seen_prompts_for_dedup = set()
333
+ cleaned_existing_prompts = []
334
+ for p in existing_prompts:
335
+ p_str = json.dumps(p, sort_keys=True)
336
+ if p_str not in seen_prompts_for_dedup:
337
+ cleaned_existing_prompts.append(p)
338
+ seen_prompts_for_dedup.add(p_str)
339
+
340
+ new_prompt_entry = {
341
+ "system_prompt": system_prompt.strip(),
342
+ "initial_prompt": initial_prompt.strip(),
343
+ "timestamp": datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z%z')
344
+ }
345
+ new_prompt_str = json.dumps(new_prompt_entry, sort_keys=True)
346
+
347
+ log_message = []
348
+ if not system_prompt.strip() or not initial_prompt.strip():
349
+ log_message.append("System prompt and Initial prompt cannot be empty.")
350
+ elif new_prompt_str in seen_prompts_for_dedup:
351
+ log_message.append("This exact prompt pair already exists in the community list.")
352
+ else:
353
+ cleaned_existing_prompts.append(new_prompt_entry)
354
+ with open(COMMUNITY_PROMPTS_FILE, "w") as f:
355
+ for p in cleaned_existing_prompts:
356
+ f.write(json.dumps(p) + "\n")
357
+ log_message.append("Prompt submitted successfully!")
358
+
359
+ # Immediately attempt to push the updated community prompts file
360
+ push_status = push_file_to_huggingface_dataset(
361
+ COMMUNITY_PROMPTS_FILE,
362
+ HF_COMMUNITY_PROMPT_FILE_IN_REPO,
363
+ "Update community_prompts.jsonl from Gradio app"
364
+ )
365
+ log_message.append(push_status)
366
+
367
+ return "\n".join(log_message), json.dumps(cleaned_existing_prompts, indent=2)
368
+
369
+ # Function to refresh community prompts display
370
+ def refresh_community_prompts_display():
371
+ prompts = load_community_prompts()
372
+ return json.dumps(prompts, indent=2)
373
+
374
+ # --- Commit Templates Functions ---
375
+ def load_commit_templates():
376
+ if not os.path.exists(COMMIT_TEMPLATES_FILE):
377
+ # Create default templates if file doesn't exist
378
+ default_templates = [
379
+ {"name": "feat: New Feature", "subject": "feat: ", "body": ""},
380
+ {"name": "fix: Bug Fix", "subject": "fix: ", "body": "Fixes #[issue_number]"},
381
+ {"name": "docs: Documentation", "subject": "docs: ", "body": ""},
382
+ {"name": "chore: Maintenance", "subject": "chore: ", "body": ""},
383
+ {"name": "style: Formatting", "subject": "style: ", "body": ""},
384
+ {"name": "refactor: Code Refactor", "subject": "refactor: ", "body": ""},
385
+ {"name": "perf: Performance Improvement", "subject": "perf: ", "body": ""},
386
+ {"name": "test: Test Update", "subject": "test: ", "body": ""},
387
+ {"name": "Custom Empty", "subject": "", "body": ""}
388
+ ]
389
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
390
+ with open(COMMIT_TEMPLATES_FILE, "w") as f:
391
+ json.dump(default_templates, f, indent=2)
392
+ return default_templates
393
+
394
+ with open(COMMIT_TEMPLATES_FILE, "r") as f:
395
+ try:
396
+ return json.load(f)
397
+ except json.JSONDecodeError:
398
+ return [] # Return empty list if file is malformed
399
+
400
+ def get_template_choices():
401
+ templates = load_commit_templates()
402
+ return [t["name"] for t in templates]
403
+
404
+ def update_commit_fields(selected_template_name):
405
+ templates = load_commit_templates()
406
+ for template in templates:
407
+ if template["name"] == selected_template_name:
408
+ return template["subject"], template["body"]
409
+ return "", "" # Fallback if not found
410
+
411
+ def save_custom_commit_template(template_name, subject, body):
412
+ templates = load_commit_templates()
413
+
414
+ if not template_name.strip():
415
+ return "Template name cannot be empty!", gr.Dropdown.update(choices=get_template_choices()), gr.JSON.update(value=templates)
416
+
417
+ # Check for existing template with the same name
418
+ found = False
419
+ for template in templates:
420
+ if template["name"] == template_name.strip():
421
+ template["subject"] = subject.strip()
422
+ template["body"] = body.strip()
423
+ found = True
424
+ break
425
+
426
+ if not found:
427
+ templates.append({
428
+ "name": template_name.strip(),
429
+ "subject": subject.strip(),
430
+ "body": body.strip()
431
+ })
432
+
433
+ with open(COMMIT_TEMPLATES_FILE, "w") as f:
434
+ json.dump(templates, f, indent=2)
435
+
436
+ return f"Template '{template_name.strip()}' saved successfully!", gr.Dropdown.update(choices=get_template_choices()), gr.JSON.update(value=templates)
437
+
438
+ def refresh_commit_display():
439
+ templates = load_commit_templates()
440
+ return gr.Dropdown.update(choices=get_template_choices()), json.dumps(templates, indent=2)
441
+
442
  # Gradio Interface setup
443
  with gr.Blocks() as demo:
444
  gr.Markdown("# Cute AI Conversation Generator 🐾")
 
447
  f"Generated data is saved and pushed to the Hugging Face dataset `{HF_DATASET_REPO_ID}`."
448
  )
449
 
450
+ with gr.Tabs():
451
+ with gr.Tab("Generate Conversations"):
452
+ with gr.Row():
453
+ num_conversations_input = gr.Slider(minimum=1, maximum=20, value=3, step=1, label="Number of Conversations to Generate", info="More conversations take longer and might hit API limits.")
 
 
 
 
 
454
 
455
+ gr.Markdown("### Model Selection")
456
+ model_choices_with_descriptions = [
457
+ f"{name} ({info['description']}, Speed: {info['speed']})"
458
+ for name, info in AVAILABLE_MODELS.items()
459
+ ]
460
+ model_selector_dropdown = gr.Dropdown(
461
+ label="Select Model (or leave empty for random)",
462
+ choices=list(AVAILABLE_MODELS.keys()), # The actual values passed will be model names
463
+ value=None, # Default to no selection, implying random
464
+ interactive=True,
465
+ info="Choose a specific model or let the app pick one randomly for each conversation."
466
+ )
467
+ # Add a Textbox for model description based on selection
468
+ model_description_output = gr.Textbox(
469
+ label="Selected Model Info",
470
+ interactive=False,
471
+ lines=2
472
+ )
473
+ def get_model_info(model_name):
474
+ if model_name and model_name in AVAILABLE_MODELS:
475
+ info = AVAILABLE_MODELS[model_name]
476
+ return f"Description: {info['description']}\nSpeed: {info['speed']}"
477
+ return "No model selected, or model not found. A random model will be chosen per conversation."
478
+
479
+ model_selector_dropdown.change(
480
+ fn=get_model_info,
481
+ inputs=model_selector_dropdown,
482
+ outputs=model_description_output
483
+ )
484
+
485
+
486
+ custom_system_prompt_input = gr.Textbox(
487
+ label="Custom System Prompt (optional)",
488
+ placeholder="e.g., You are a helpful and kind AI assistant.",
489
+ info="Define the AI's role or personality. If left empty, a random cute role-play prompt will be used.",
490
+ lines=3
491
+ )
492
+
493
+ custom_prompts_input = gr.Textbox(
494
+ label="Custom Initial Prompts (optional)",
495
+ placeholder="e.g., What's your favorite color?, Tell me a joke, What makes you happy?",
496
+ info="Enter multiple prompts separated by commas. If left empty, default prompts will be used. Make sure to include '[NAME]' if you want a name inserted.",
497
+ lines=3
498
+ )
499
+
500
+ gr.Markdown("### Hugging Face Commit Message")
501
+ with gr.Row():
502
+ commit_template_dropdown = gr.Dropdown(
503
+ label="Select Commit Message Template",
504
+ choices=get_template_choices(),
505
+ value=get_template_choices()[0] if get_template_choices() else None,
506
+ interactive=True
507
+ )
508
+ refresh_commit_templates_button = gr.Button("Refresh Templates")
509
+
510
+ commit_subject_input = gr.Textbox(
511
+ label="Commit Subject (max 50 chars)",
512
+ placeholder="e.g., feat: Add conversation generation feature",
513
+ lines=1,
514
+ max_lines=1
515
+ )
516
+ commit_body_input = gr.Textbox(
517
+ label="Commit Body (optional)",
518
+ placeholder="Detailed description of changes. Use imperative mood.",
519
+ lines=5
520
+ )
521
+
522
+ generate_button = gr.Button("Generate & Push Conversations")
523
+
524
+ output_conversations = gr.JSON(label="Generated Conversations (Content of conversations.jsonl)")
525
+ output_log = gr.Textbox(label="Process Log", interactive=False, lines=10, max_lines=20)
526
+
527
+ # Link commit template dropdown to update fields
528
+ commit_template_dropdown.change(
529
+ fn=update_commit_fields,
530
+ inputs=commit_template_dropdown,
531
+ outputs=[commit_subject_input, commit_body_input]
532
+ )
533
+ # Initial load of commit fields based on default/first template
534
+ demo.load(
535
+ fn=lambda: update_commit_fields(get_template_choices()[0] if get_template_choices() else None),
536
+ inputs=None,
537
+ outputs=[commit_subject_input, commit_body_input]
538
+ )
539
+
540
+ generate_button.click(
541
+ fn=generate_and_display_conversations,
542
+ inputs=[
543
+ num_conversations_input,
544
+ custom_prompts_input,
545
+ custom_system_prompt_input,
546
+ commit_subject_input, # Pass commit subject
547
+ commit_body_input, # Pass commit body
548
+ model_selector_dropdown # Pass selected model name
549
+ ],
550
+ outputs=[output_conversations, output_log],
551
+ show_progress=True
552
+ )
553
+
554
+ with gr.Tab("Community Prompts"):
555
+ gr.Markdown("## Share Your Favorite Prompts with the Community!")
556
+ gr.Markdown(
557
+ "Submit cute and engaging system prompts and initial prompts here. "
558
+ "These will be added to a shared list for others to see and use."
559
+ )
560
+ community_system_prompt_input = gr.Textbox(
561
+ label="Your System Prompt",
562
+ placeholder="e.g., You are a tiny, cheerful squirrel, Squeaky, who loves nuts and collecting shiny things.",
563
+ lines=3,
564
+ interactive=True
565
+ )
566
+ community_initial_prompt_input = gr.Textbox(
567
+ label="Your Initial Prompt (Use [NAME] for dynamic naming)",
568
+ placeholder="e.g., Hey [NAME], what's your favorite type of acorn?",
569
+ lines=2,
570
+ interactive=True
571
+ )
572
+ submit_community_prompt_button = gr.Button("Submit Prompt to Community")
573
+ community_submit_status = gr.Textbox(label="Submission Status", interactive=False)
574
+
575
+ gr.Markdown("---")
576
+ gr.Markdown("## Current Community Prompts")
577
+ refresh_community_prompts_button = gr.Button("Refresh Community Prompts")
578
+ community_prompts_display = gr.JSON(label="Submitted Community Prompts")
579
+
580
+ submit_community_prompt_button.click(
581
+ fn=save_community_prompt,
582
+ inputs=[community_system_prompt_input, community_initial_prompt_input],
583
+ outputs=[community_submit_status, community_prompts_display],
584
+ show_progress=True
585
+ )
586
+
587
+ # Initial load and refresh action for community prompts
588
+ demo.load(refresh_community_prompts_display, inputs=None, outputs=community_prompts_display)
589
+ refresh_community_prompts_button.click(refresh_community_prompts_display, inputs=None, outputs=community_prompts_display)
590
+
591
+ with gr.Tab("Manage Commit Templates"): # New Tab for Commit Templates
592
+ gr.Markdown("## Manage Your Local Git Commit Message Templates")
593
+ gr.Markdown(
594
+ "Select an existing template to edit, or enter a new name to create a new one. "
595
+ "These templates are saved locally in `generated/commits.json`."
596
+ )
597
+
598
+ commit_template_edit_dropdown = gr.Dropdown(
599
+ label="Select Template to Edit/View",
600
+ choices=get_template_choices(),
601
+ value=get_template_choices()[0] if get_template_choices() else None,
602
+ interactive=True
603
+ )
604
+
605
+ commit_template_name_input = gr.Textbox(
606
+ label="Template Name (for saving new or editing existing)",
607
+ placeholder="e.g., feat: Add New Feature Template"
608
+ )
609
+ commit_template_subject_input = gr.Textbox(
610
+ label="Template Subject Line",
611
+ placeholder="e.g., feat: "
612
+ )
613
+ commit_template_body_input = gr.Textbox(
614
+ label="Template Body (optional)",
615
+ placeholder="e.g., - Detailed description of the feature\n- Related issue: #XYZ",
616
+ lines=5
617
+ )
618
+
619
+ save_template_button = gr.Button("Save/Update Template")
620
+ template_status_output = gr.Textbox(label="Template Save Status", interactive=False)
621
+ all_templates_display = gr.JSON(label="All Current Commit Templates")
622
+
623
+ # Link dropdown to populate edit fields
624
+ commit_template_edit_dropdown.change(
625
+ fn=lambda name: (name, update_commit_fields(name)[0], update_commit_fields(name)[1]),
626
+ inputs=commit_template_edit_dropdown,
627
+ outputs=[commit_template_name_input, commit_template_subject_input, commit_template_body_input]
628
+ )
629
+
630
+ # Action to save/update template
631
+ save_template_button.click(
632
+ fn=save_custom_commit_template,
633
+ inputs=[commit_template_name_input, commit_template_subject_input, commit_template_body_input],
634
+ outputs=[template_status_output, commit_template_edit_dropdown, all_templates_display] # Update dropdown and JSON display
635
+ )
636
+
637
+ # Initial load of template management tab
638
+ demo.load(
639
+ fn=lambda: (
640
+ get_template_choices()[0] if get_template_choices() else None, # initial dropdown value
641
+ get_template_choices()[0] if get_template_choices() else None, # initial name input
642
+ update_commit_fields(get_template_choices()[0] if get_template_choices() else None)[0], # initial subject
643
+ update_commit_fields(get_template_choices()[0] if get_template_choices() else None)[1], # initial body
644
+ json.dumps(load_commit_templates(), indent=2) # initial JSON display
645
+ ),
646
+ inputs=None,
647
+ outputs=[
648
+ commit_template_edit_dropdown,
649
+ commit_template_name_input,
650
+ commit_template_subject_input,
651
+ commit_template_body_input,
652
+ all_templates_display
653
+ ]
654
+ )
655
+
656
+ # Refresh button for the main commit templates dropdown in 'Generate Conversations' tab
657
+ refresh_commit_templates_button.click(
658
+ fn=refresh_commit_display,
659
+ inputs=None,
660
+ outputs=[commit_template_dropdown, all_templates_display] # Refresh both dropdowns and the JSON display
661
+ )
662
 
 
663
 
 
 
 
 
 
 
 
 
 
 
664
  gr.Markdown("---")
665
  gr.Markdown(
666
+ "**Note on Push to Hub:** This Space is configured to automatically push generated data and "
667
+ "community prompts to the Hugging Face dataset "
668
  f"`{HF_DATASET_REPO_ID}` using a Hugging Face token securely stored as a Space Secret (`HF_TOKEN`). "
669
  "User tokens are not required."
670
  )
671
  current_datetime_vietnam = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=7))).strftime('%Y-%m-%d %H:%M:%S %Z%z')
672
+ gr.Markdown(f"Current server time: {current_datetime_vietnam} (An Nhơn, Binh Dinh, Vietnam)")
673
 
674
 
675
  # Launch the Gradio app
676
  if __name__ == "__main__":
677
+ # Ensure output directory exists and default commit templates exist on startup
678
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
679
+ load_commit_templates() # This will create the file if it doesn't exist with defaults
680
+
681
  demo.launch(debug=True, share=False)