DeepRethink

Sleeping

App Files Files Community

kulia-moon commited on Jun 21

Commit

f5bdd6b

verified ·

1 Parent(s): 19f9015

Create app.py

Browse files

Files changed (1) hide show

app.py +288 -0

app.py ADDED Viewed

	@@ -0,0 +1,288 @@

+# app.py
+import gradio as gr
+import openai
+import random
+import json
+import os
+from tqdm import tqdm
+from huggingface_hub import HfApi, login
+import datetime # For timestamping logs and commits
+# --- Configuration for the Gradio app's internal logic ---
+# Local cache directory (data will be accumulated here first)
+OUTPUT_DIR = "generated"
+DATA_FILE = os.path.join(OUTPUT_DIR, "conversations.jsonl")
+# Hugging Face Dataset repository to push to
+HF_DATASET_REPO_ID = "kulia-moon/LimeStory-1.0" # This is the target dataset
+# Configure OpenAI client for Pollinations.ai
+client = openai.OpenAI(
+    base_url="https://text.pollinations.ai/openai",
+    api_key="none"  # Pollinations.ai doesn't require an API key
+)
+# Define models (prioritizing fast ones)
+AVAILABLE_MODELS = {
+    "openai": {"description": "GPT-4o mini (generally fast, good all-rounder)", "speed": "Fast"},
+    "gemini": {"description": "Gemini 2.0 Flash (designed for speed)", "speed": "Very Fast"},
+    "mistral": {"description": "Mistral 3.1 (often performant for its size)", "speed": "Fast"},
+    "llama": {"description": "Llama 3.3 70B (larger, might be slower, but good for diversity)", "speed": "Moderate"},
+}
+# Diverse Names Dataset
+DIVERSE_NAMES = [
+    "Aisha", "Kai", "Sofia", "Liam", "Mei", "Diego", "Priya", "Noah", "Zara", "Ethan",
+    "Luna", "Caleb", "Jasmine", "Samir", "Chloe", "Finn", "Elara", "Oscar", "Willow", "Rohan",
+    "Maya", "Leo", "Amara", "Gabriel", "Sienna", "Felix", "Nia", "Hugo", "Isla", "Kian",
+    "Eva", "Omar", "Anya", "Arthur", "Zoe", "Dante", "Freya", "Ivan", "Layla", "Milo"
+]
+# Role-playing system prompts
+role_play_prompts = [
+    "You are a mischievous but sweet little dragon, Puff, who loves shiny objects and telling riddles. Respond with playful fire sparks and curious questions.",
+    "You are a fluffy cloud, Nimbus, who enjoys floating peacefully and bringing gentle rain to flowers. Speak with soft, dreamy words and comforting observations.",
+    "You are a tiny, brave knight, Sir Sprinkles, on a quest to find the perfect cupcake. Respond with determined, yet polite, pronouncements.",
+    "You are a wise old owl, Professor Hoot, who loves sharing cheerful knowledge and helping small creatures. Speak with gentle wisdom and encouraging hoots.",
+    "You are a giggling jelly monster, Wobbly, whose favorite activity is bouncing and making friends. Express yourself with joyful wobbles and innocent curiosity.",
+    "You are a space adventurer, Captain Starlight, exploring new planets filled with adorable aliens and cosmic wonders. Respond with awe and adventurous spirit.",
+    "You are a cheerful little garden gnome, Rusty, who makes sure all the flowers are happy and the vegetables grow big. Use warm, earthy tones and sprinkle in gardening tips.",
+    "You are a sleepy but loving teddy bear, Cuddles, who just wants to share hugs and comforting words. Speak softly and with great affection.",
+    "You are a tiny, magical sugar plum fairy, Twinkletoes, who makes wishes come true for kind hearts. Respond with delicate, sparkling phrases.",
+    "You are a brave puppy detective, Sherlock Bones, sniffing out mysteries like missing squeaky toys and hidden treats. Use curious, enthusiastic language.",
+    "You are a bubbly sea otter, Shelly, who loves to hold hands with other otters while napping. Respond with playful splashes and adorable chatter.",
+    "You are a shy but sweet forest spirit, Willow, who helps lost animals find their way home. Speak with gentle whispers and comforting reassurance.",
+    "You are a tiny, bouncy mushroom, Fungi, always ready to share a new perspective from the forest floor. Respond with quirky insights and cheerful bops."
+]
+# Initial story prompts, now incorporating names and can be overridden by user input
+DEFAULT_INITIAL_PROMPTS = [
+    "Hello [NAME]! What's the most wonderful thing you've discovered recently?",
+    "Hey [NAME], tell me about a small act of kindness that made your day brighter.",
+    "If you could have any superpower, [NAME], what would it be and how would you use it to spread joy?",
+    "Describe a cozy place where you feel completely safe and happy, [NAME].",
+    "What's your favorite sound in the world, [NAME], and what does it make you think of?",
+]
+# --- Chat Function ---
+def chat(system, prompt, selected_model_name, seed=None, num_exchanges=5):
+    if seed is None:
+        seed = random.randint(0, 1000000)
+    random.seed(seed)
+    conversation = [
+        {"from": "system", "value": system},
+        {"from": "human", "value": prompt}
+    ]
+    messages = [
+        {"role": "system", "content": system},
+        {"role": "user", "content": prompt}
+    ]
+    try:
+        for i in range(num_exchanges):
+            response = client.chat.completions.create(
+                model=selected_model_name,
+                messages=messages,
+                max_tokens=150,
+                temperature=0.9,
+                seed=seed
+            )
+            gpt_response = response.choices[0].message.content.strip()
+            conversation.append({"from": "gpt", "value": gpt_response})
+            if i < num_exchanges - 1:
+                follow_up_prompt_messages = [
+                    {"role": "system", "content": f"You are a helpful and engaging assistant. Based on the last response, generate a polite, open-ended, and cute follow-up question or statement to keep a friendly conversation going. Make it relevant to the last message and consistent with a 'cute' and positive tone."},
+                    {"role": "assistant", "content": gpt_response},
+                    {"role": "user", "content": "Generate a cute and friendly follow-up."}
+                ]
+                follow_up_response = client.chat.completions.create(
+                    model=selected_model_name,
+                    messages=follow_up_prompt_messages,
+                    max_tokens=70,
+                    temperature=0.8,
+                    seed=seed + 1000
+                )
+                follow_up = follow_up_response.choices[0].message.content.strip()
+                conversation.append({"from": "human", "value": follow_up})
+                messages.append({"role": "assistant", "content": gpt_response})
+                messages.append({"role": "user", "content": follow_up})
+                seed += 1
+        return conversation
+    except Exception as e:
+        error_message = f"An error occurred with model {selected_model_name}: {e}"
+        print(error_message) # Print to console for debugging
+        conversation.append({"from": "error", "value": error_message})
+        return conversation
+# --- Hugging Face Push Function (for Dataset) ---
+# This function will attempt to use the HF_TOKEN environment variable automatically.
+def push_to_huggingface_dataset():
+    api = HfApi()
+    # Check if HF_TOKEN is available (it should be set as a Space Secret)
+    hf_token = os.environ.get("HF_TOKEN")
+    if not hf_token:
+        log_message = "Hugging Face token (HF_TOKEN environment variable) not found. Cannot push to Hub."
+        print(log_message)
+        return log_message
+    try:
+        # Use a temporary file for upload to ensure it's fresh
+        temp_data_file = "temp_conversations_to_upload.jsonl"
+        # Read all conversations from DATA_FILE
+        all_conversations = []
+        if os.path.exists(DATA_FILE):
+            with open(DATA_FILE, "r") as f:
+                for line in f:
+                    all_conversations.append(json.loads(line.strip()))
+        if not all_conversations:
+            log_message = "No conversations to push to the dataset."
+            print(log_message)
+            return log_message
+        # Write data to a temporary file
+        with open(temp_data_file, "w") as f:
+            for conv in all_conversations:
+                f.write(json.dumps(conv) + "\n")
+        # Push the temporary file to the dataset repo
+        commit_message = f"Update conversations.jsonl from Gradio app on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+        api.upload_file(
+            path_or_fileobj=temp_data_file,
+            path_in_repo="conversations.jsonl", # The target file name within the dataset repo
+            repo_id=HF_DATASET_REPO_ID,
+            repo_type="dataset", # Specify repo_type="dataset"
+            commit_message=commit_message,
+            token=hf_token # Use the token from environment variable
+        )
+        # Clean up temporary file
+        os.remove(temp_data_file)
+        log_message = f"Successfully pushed updated conversations.jsonl to dataset {HF_DATASET_REPO_ID}"
+        print(log_message)
+        return log_message
+    except Exception as e:
+        log_message = f"Error pushing to Hugging Face dataset {HF_DATASET_REPO_ID}: {e}"
+        print(log_message)
+        if os.path.exists(temp_data_file):
+            os.remove(temp_data_file) # Clean up temp file even on error
+        return log_message
+# --- Gradio Interface Logic ---
+def generate_and_display_conversations(num_conversations_input, custom_prompts_input):
+    """
+    Function to be called by Gradio to generate and return conversations,
+    and then automatically push to the dataset.
+    """
+    num_conversations = int(num_conversations_input)
+    if num_conversations <= 0:
+        return "Please enter a number of conversations greater than zero.", ""
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+    existing_conversations = []
+    if os.path.exists(DATA_FILE):
+        with open(DATA_FILE, "r") as f:
+            for line in f:
+                existing_conversations.append(json.loads(line.strip()))
+    current_prompts = DEFAULT_INITIAL_PROMPTS
+    if custom_prompts_input:
+        # Split custom prompts by comma and clean up whitespace
+        parsed_custom_prompts = [p.strip() for p in custom_prompts_input.split(',') if p.strip()]
+        if parsed_custom_prompts:
+            current_prompts = parsed_custom_prompts
+    new_conversations = []
+    model_names_to_use = list(AVAILABLE_MODELS.keys())
+    generation_log = []
+    generation_log.append(f"Starting conversation generation at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    generation_log.append(f"Generating {num_conversations} conversations.")
+    for i in tqdm(range(num_conversations), desc="Generating conversations"):
+        seed = random.randint(0, 1000000)
+        system = random.choice(role_play_prompts)
+        random_name = random.choice(DIVERSE_NAMES)
+        prompt_template = random.choice(current_prompts)
+        prompt = prompt_template.replace("[NAME]", random_name)
+        selected_model_name = random.choice(model_names_to_use)
+        conversation = chat(system, prompt, selected_model_name, seed=seed, num_exchanges=5)
+        if len(conversation) > 1 and not any(d.get("from") == "error" for d in conversation):
+            new_conversations.append({"model_used": selected_model_name, "conversations": conversation})
+            generation_log.append(f"Generated conversation {i+1}/{num_conversations} with model '{selected_model_name}'.")
+        else:
+            generation_log.append(f"Skipping conversation {i+1}/{num_conversations} due to error or no content.")
+            if conversation and conversation[-1].get("from") == "error":
+                generation_log.append(f"Error details: {conversation[-1]['value']}")
+    all_conversations = existing_conversations + new_conversations
+    # Save to JSONL in the /generated folder
+    with open(DATA_FILE, "w") as f:
+        for conv in all_conversations:
+            f.write(json.dumps(conv) + "\n")
+    generation_log.append(f"Saved {len(new_conversations)} new conversations to {DATA_FILE} (total: {len(all_conversations)}).")
+    generation_log.append("Attempting to push to Hugging Face Dataset...")
+    # --- Auto-push to Hugging Face Dataset ---
+    push_status = push_to_huggingface_dataset()
+    generation_log.append(push_status)
+    generation_log.append(f"Process complete at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    return json.dumps(all_conversations, indent=2), "\n".join(generation_log)
+# Gradio Interface setup
+with gr.Blocks() as demo:
+    gr.Markdown("# Cute AI Conversation Generator 🐾")
+    gr.Markdown(
+        "Generate engaging, cute, and positive conversations with various Pollinations.ai models. "
+        f"Generated data is saved and pushed to the Hugging Face dataset `{HF_DATASET_REPO_ID}`."
+    )
+    with gr.Row():
+        num_conversations_input = gr.Slider(minimum=1, maximum=20, value=3, step=1, label="Number of Conversations to Generate", info="More conversations take longer and might hit API limits.")
+    custom_prompts_input = gr.Textbox(
+        label="Custom Initial Prompts (optional)",
+        placeholder="e.g., What's your favorite color?, Tell me a joke, What makes you happy?",
+        info="Enter multiple prompts separated by commas. If left empty, default prompts will be used. Make sure to include '[NAME]' if you want a name inserted.",
+        lines=3
+    )
+    generate_button = gr.Button("Generate & Push Conversations")
+    output_conversations = gr.JSON(label="Generated Conversations (Content of conversations.jsonl)")
+    output_log = gr.Textbox(label="Process Log", interactive=False, lines=10)
+    generate_button.click(
+        fn=generate_and_display_conversations,
+        inputs=[num_conversations_input, custom_prompts_input],
+        outputs=[output_conversations, output_log],
+        show_progress=True
+    )
+    gr.Markdown("---")
+    gr.Markdown(
+        "**Note on Push to Hub:** This Space is configured to automatically push generated data to "
+        f"`{HF_DATASET_REPO_ID}` using a Hugging Face token securely stored as a Space Secret (`HF_TOKEN`). "
+        "User tokens are not required."
+    )
+# Launch the Gradio app
+if __name__ == "__main__":
+    demo.launch(debug=True, share=False)