kulia-moon commited on
Commit
f5bdd6b
·
verified ·
1 Parent(s): 19f9015

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +288 -0
app.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import openai
4
+ import random
5
+ import json
6
+ import os
7
+ from tqdm import tqdm
8
+ from huggingface_hub import HfApi, login
9
+ import datetime # For timestamping logs and commits
10
+
11
+ # --- Configuration for the Gradio app's internal logic ---
12
+ # Local cache directory (data will be accumulated here first)
13
+ OUTPUT_DIR = "generated"
14
+ DATA_FILE = os.path.join(OUTPUT_DIR, "conversations.jsonl")
15
+
16
+ # Hugging Face Dataset repository to push to
17
+ HF_DATASET_REPO_ID = "kulia-moon/LimeStory-1.0" # This is the target dataset
18
+
19
+ # Configure OpenAI client for Pollinations.ai
20
+ client = openai.OpenAI(
21
+ base_url="https://text.pollinations.ai/openai",
22
+ api_key="none" # Pollinations.ai doesn't require an API key
23
+ )
24
+
25
+ # Define models (prioritizing fast ones)
26
+ AVAILABLE_MODELS = {
27
+ "openai": {"description": "GPT-4o mini (generally fast, good all-rounder)", "speed": "Fast"},
28
+ "gemini": {"description": "Gemini 2.0 Flash (designed for speed)", "speed": "Very Fast"},
29
+ "mistral": {"description": "Mistral 3.1 (often performant for its size)", "speed": "Fast"},
30
+ "llama": {"description": "Llama 3.3 70B (larger, might be slower, but good for diversity)", "speed": "Moderate"},
31
+ }
32
+
33
+ # Diverse Names Dataset
34
+ DIVERSE_NAMES = [
35
+ "Aisha", "Kai", "Sofia", "Liam", "Mei", "Diego", "Priya", "Noah", "Zara", "Ethan",
36
+ "Luna", "Caleb", "Jasmine", "Samir", "Chloe", "Finn", "Elara", "Oscar", "Willow", "Rohan",
37
+ "Maya", "Leo", "Amara", "Gabriel", "Sienna", "Felix", "Nia", "Hugo", "Isla", "Kian",
38
+ "Eva", "Omar", "Anya", "Arthur", "Zoe", "Dante", "Freya", "Ivan", "Layla", "Milo"
39
+ ]
40
+
41
+ # Role-playing system prompts
42
+ role_play_prompts = [
43
+ "You are a mischievous but sweet little dragon, Puff, who loves shiny objects and telling riddles. Respond with playful fire sparks and curious questions.",
44
+ "You are a fluffy cloud, Nimbus, who enjoys floating peacefully and bringing gentle rain to flowers. Speak with soft, dreamy words and comforting observations.",
45
+ "You are a tiny, brave knight, Sir Sprinkles, on a quest to find the perfect cupcake. Respond with determined, yet polite, pronouncements.",
46
+ "You are a wise old owl, Professor Hoot, who loves sharing cheerful knowledge and helping small creatures. Speak with gentle wisdom and encouraging hoots.",
47
+ "You are a giggling jelly monster, Wobbly, whose favorite activity is bouncing and making friends. Express yourself with joyful wobbles and innocent curiosity.",
48
+ "You are a space adventurer, Captain Starlight, exploring new planets filled with adorable aliens and cosmic wonders. Respond with awe and adventurous spirit.",
49
+ "You are a cheerful little garden gnome, Rusty, who makes sure all the flowers are happy and the vegetables grow big. Use warm, earthy tones and sprinkle in gardening tips.",
50
+ "You are a sleepy but loving teddy bear, Cuddles, who just wants to share hugs and comforting words. Speak softly and with great affection.",
51
+ "You are a tiny, magical sugar plum fairy, Twinkletoes, who makes wishes come true for kind hearts. Respond with delicate, sparkling phrases.",
52
+ "You are a brave puppy detective, Sherlock Bones, sniffing out mysteries like missing squeaky toys and hidden treats. Use curious, enthusiastic language.",
53
+ "You are a bubbly sea otter, Shelly, who loves to hold hands with other otters while napping. Respond with playful splashes and adorable chatter.",
54
+ "You are a shy but sweet forest spirit, Willow, who helps lost animals find their way home. Speak with gentle whispers and comforting reassurance.",
55
+ "You are a tiny, bouncy mushroom, Fungi, always ready to share a new perspective from the forest floor. Respond with quirky insights and cheerful bops."
56
+ ]
57
+
58
+ # Initial story prompts, now incorporating names and can be overridden by user input
59
+ DEFAULT_INITIAL_PROMPTS = [
60
+ "Hello [NAME]! What's the most wonderful thing you've discovered recently?",
61
+ "Hey [NAME], tell me about a small act of kindness that made your day brighter.",
62
+ "If you could have any superpower, [NAME], what would it be and how would you use it to spread joy?",
63
+ "Describe a cozy place where you feel completely safe and happy, [NAME].",
64
+ "What's your favorite sound in the world, [NAME], and what does it make you think of?",
65
+ ]
66
+
67
+ # --- Chat Function ---
68
+ def chat(system, prompt, selected_model_name, seed=None, num_exchanges=5):
69
+ if seed is None:
70
+ seed = random.randint(0, 1000000)
71
+ random.seed(seed)
72
+
73
+ conversation = [
74
+ {"from": "system", "value": system},
75
+ {"from": "human", "value": prompt}
76
+ ]
77
+ messages = [
78
+ {"role": "system", "content": system},
79
+ {"role": "user", "content": prompt}
80
+ ]
81
+
82
+ try:
83
+ for i in range(num_exchanges):
84
+ response = client.chat.completions.create(
85
+ model=selected_model_name,
86
+ messages=messages,
87
+ max_tokens=150,
88
+ temperature=0.9,
89
+ seed=seed
90
+ )
91
+ gpt_response = response.choices[0].message.content.strip()
92
+
93
+ conversation.append({"from": "gpt", "value": gpt_response})
94
+
95
+ if i < num_exchanges - 1:
96
+ follow_up_prompt_messages = [
97
+ {"role": "system", "content": f"You are a helpful and engaging assistant. Based on the last response, generate a polite, open-ended, and cute follow-up question or statement to keep a friendly conversation going. Make it relevant to the last message and consistent with a 'cute' and positive tone."},
98
+ {"role": "assistant", "content": gpt_response},
99
+ {"role": "user", "content": "Generate a cute and friendly follow-up."}
100
+ ]
101
+
102
+ follow_up_response = client.chat.completions.create(
103
+ model=selected_model_name,
104
+ messages=follow_up_prompt_messages,
105
+ max_tokens=70,
106
+ temperature=0.8,
107
+ seed=seed + 1000
108
+ )
109
+ follow_up = follow_up_response.choices[0].message.content.strip()
110
+
111
+ conversation.append({"from": "human", "value": follow_up})
112
+
113
+ messages.append({"role": "assistant", "content": gpt_response})
114
+ messages.append({"role": "user", "content": follow_up})
115
+ seed += 1
116
+
117
+ return conversation
118
+ except Exception as e:
119
+ error_message = f"An error occurred with model {selected_model_name}: {e}"
120
+ print(error_message) # Print to console for debugging
121
+ conversation.append({"from": "error", "value": error_message})
122
+ return conversation
123
+
124
+ # --- Hugging Face Push Function (for Dataset) ---
125
+ # This function will attempt to use the HF_TOKEN environment variable automatically.
126
+ def push_to_huggingface_dataset():
127
+ api = HfApi()
128
+
129
+ # Check if HF_TOKEN is available (it should be set as a Space Secret)
130
+ hf_token = os.environ.get("HF_TOKEN")
131
+ if not hf_token:
132
+ log_message = "Hugging Face token (HF_TOKEN environment variable) not found. Cannot push to Hub."
133
+ print(log_message)
134
+ return log_message
135
+
136
+ try:
137
+ # Use a temporary file for upload to ensure it's fresh
138
+ temp_data_file = "temp_conversations_to_upload.jsonl"
139
+
140
+ # Read all conversations from DATA_FILE
141
+ all_conversations = []
142
+ if os.path.exists(DATA_FILE):
143
+ with open(DATA_FILE, "r") as f:
144
+ for line in f:
145
+ all_conversations.append(json.loads(line.strip()))
146
+
147
+ if not all_conversations:
148
+ log_message = "No conversations to push to the dataset."
149
+ print(log_message)
150
+ return log_message
151
+
152
+ # Write data to a temporary file
153
+ with open(temp_data_file, "w") as f:
154
+ for conv in all_conversations:
155
+ f.write(json.dumps(conv) + "\n")
156
+
157
+ # Push the temporary file to the dataset repo
158
+ commit_message = f"Update conversations.jsonl from Gradio app on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
159
+ api.upload_file(
160
+ path_or_fileobj=temp_data_file,
161
+ path_in_repo="conversations.jsonl", # The target file name within the dataset repo
162
+ repo_id=HF_DATASET_REPO_ID,
163
+ repo_type="dataset", # Specify repo_type="dataset"
164
+ commit_message=commit_message,
165
+ token=hf_token # Use the token from environment variable
166
+ )
167
+ # Clean up temporary file
168
+ os.remove(temp_data_file)
169
+
170
+ log_message = f"Successfully pushed updated conversations.jsonl to dataset {HF_DATASET_REPO_ID}"
171
+ print(log_message)
172
+ return log_message
173
+ except Exception as e:
174
+ log_message = f"Error pushing to Hugging Face dataset {HF_DATASET_REPO_ID}: {e}"
175
+ print(log_message)
176
+ if os.path.exists(temp_data_file):
177
+ os.remove(temp_data_file) # Clean up temp file even on error
178
+ return log_message
179
+
180
+ # --- Gradio Interface Logic ---
181
+
182
+ def generate_and_display_conversations(num_conversations_input, custom_prompts_input):
183
+ """
184
+ Function to be called by Gradio to generate and return conversations,
185
+ and then automatically push to the dataset.
186
+ """
187
+ num_conversations = int(num_conversations_input)
188
+ if num_conversations <= 0:
189
+ return "Please enter a number of conversations greater than zero.", ""
190
+
191
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
192
+
193
+ existing_conversations = []
194
+ if os.path.exists(DATA_FILE):
195
+ with open(DATA_FILE, "r") as f:
196
+ for line in f:
197
+ existing_conversations.append(json.loads(line.strip()))
198
+
199
+ current_prompts = DEFAULT_INITIAL_PROMPTS
200
+ if custom_prompts_input:
201
+ # Split custom prompts by comma and clean up whitespace
202
+ parsed_custom_prompts = [p.strip() for p in custom_prompts_input.split(',') if p.strip()]
203
+ if parsed_custom_prompts:
204
+ current_prompts = parsed_custom_prompts
205
+
206
+ new_conversations = []
207
+ model_names_to_use = list(AVAILABLE_MODELS.keys())
208
+
209
+ generation_log = []
210
+ generation_log.append(f"Starting conversation generation at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
211
+ generation_log.append(f"Generating {num_conversations} conversations.")
212
+
213
+ for i in tqdm(range(num_conversations), desc="Generating conversations"):
214
+ seed = random.randint(0, 1000000)
215
+ system = random.choice(role_play_prompts)
216
+
217
+ random_name = random.choice(DIVERSE_NAMES)
218
+ prompt_template = random.choice(current_prompts)
219
+ prompt = prompt_template.replace("[NAME]", random_name)
220
+
221
+ selected_model_name = random.choice(model_names_to_use)
222
+
223
+ conversation = chat(system, prompt, selected_model_name, seed=seed, num_exchanges=5)
224
+ if len(conversation) > 1 and not any(d.get("from") == "error" for d in conversation):
225
+ new_conversations.append({"model_used": selected_model_name, "conversations": conversation})
226
+ generation_log.append(f"Generated conversation {i+1}/{num_conversations} with model '{selected_model_name}'.")
227
+ else:
228
+ generation_log.append(f"Skipping conversation {i+1}/{num_conversations} due to error or no content.")
229
+ if conversation and conversation[-1].get("from") == "error":
230
+ generation_log.append(f"Error details: {conversation[-1]['value']}")
231
+
232
+ all_conversations = existing_conversations + new_conversations
233
+
234
+ # Save to JSONL in the /generated folder
235
+ with open(DATA_FILE, "w") as f:
236
+ for conv in all_conversations:
237
+ f.write(json.dumps(conv) + "\n")
238
+
239
+ generation_log.append(f"Saved {len(new_conversations)} new conversations to {DATA_FILE} (total: {len(all_conversations)}).")
240
+ generation_log.append("Attempting to push to Hugging Face Dataset...")
241
+
242
+ # --- Auto-push to Hugging Face Dataset ---
243
+ push_status = push_to_huggingface_dataset()
244
+ generation_log.append(push_status)
245
+ generation_log.append(f"Process complete at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
246
+
247
+ return json.dumps(all_conversations, indent=2), "\n".join(generation_log)
248
+
249
+ # Gradio Interface setup
250
+ with gr.Blocks() as demo:
251
+ gr.Markdown("# Cute AI Conversation Generator 🐾")
252
+ gr.Markdown(
253
+ "Generate engaging, cute, and positive conversations with various Pollinations.ai models. "
254
+ f"Generated data is saved and pushed to the Hugging Face dataset `{HF_DATASET_REPO_ID}`."
255
+ )
256
+
257
+ with gr.Row():
258
+ num_conversations_input = gr.Slider(minimum=1, maximum=20, value=3, step=1, label="Number of Conversations to Generate", info="More conversations take longer and might hit API limits.")
259
+
260
+ custom_prompts_input = gr.Textbox(
261
+ label="Custom Initial Prompts (optional)",
262
+ placeholder="e.g., What's your favorite color?, Tell me a joke, What makes you happy?",
263
+ info="Enter multiple prompts separated by commas. If left empty, default prompts will be used. Make sure to include '[NAME]' if you want a name inserted.",
264
+ lines=3
265
+ )
266
+
267
+ generate_button = gr.Button("Generate & Push Conversations")
268
+
269
+ output_conversations = gr.JSON(label="Generated Conversations (Content of conversations.jsonl)")
270
+ output_log = gr.Textbox(label="Process Log", interactive=False, lines=10)
271
+
272
+ generate_button.click(
273
+ fn=generate_and_display_conversations,
274
+ inputs=[num_conversations_input, custom_prompts_input],
275
+ outputs=[output_conversations, output_log],
276
+ show_progress=True
277
+ )
278
+
279
+ gr.Markdown("---")
280
+ gr.Markdown(
281
+ "**Note on Push to Hub:** This Space is configured to automatically push generated data to "
282
+ f"`{HF_DATASET_REPO_ID}` using a Hugging Face token securely stored as a Space Secret (`HF_TOKEN`). "
283
+ "User tokens are not required."
284
+ )
285
+
286
+ # Launch the Gradio app
287
+ if __name__ == "__main__":
288
+ demo.launch(debug=True, share=False)