Spaces:
Sleeping
Sleeping
| # app.py | |
| import gradio as gr | |
| import openai | |
| import random | |
| import json | |
| import os | |
| from tqdm import tqdm | |
| from huggingface_hub import HfApi, login | |
| import datetime | |
| # --- Configuration for the Gradio app's internal logic --- | |
| # Local cache directory (data will be accumulated here first) | |
| OUTPUT_DIR = "generated" | |
| DATA_FILE = os.path.join(OUTPUT_DIR, "conversations.jsonl") | |
| COMMUNITY_PROMPTS_FILE = os.path.join(OUTPUT_DIR, "community_prompts.jsonl") | |
| COMMIT_TEMPLATES_FILE = os.path.join(OUTPUT_DIR, "commits.json") # New: Commit templates file | |
| # Hugging Face Dataset repository to push to | |
| HF_DATASET_REPO_ID = "kulia-moon/DeepRethink" # This is the target dataset for conversations | |
| HF_COMMUNITY_PROMPT_FILE_IN_REPO = "community_prompts.jsonl" # Target file name within the dataset repo for community prompts | |
| # Configure OpenAI client for Pollinations.ai | |
| client = openai.OpenAI( | |
| base_url="https://text.pollinations.ai/openai", | |
| api_key=os.environ.get("TOKEN") # Pollinations.ai doesn't require an API key | |
| ) | |
| # Define ALL available models from https://text.pollinations.ai/models | |
| AVAILABLE_MODELS = { | |
| "openai": {"description": "GPT-4o mini (generally fast, good all-rounder)", "speed": "Fast"}, | |
| "gemini": {"description": "Gemini 2.0 Flash (designed for speed)", "speed": "Very Fast"}, | |
| "mistral": {"description": "Mistral 3.1 (often performant for its size)", "speed": "Fast"}, | |
| "llama": {"description": "Llama 3.3 70B (larger, good for diversity)", "speed": "Moderate"}, | |
| "claude": {"description": "Claude 3.5 Haiku (via Pollinations gateway, good for chat)", "speed": "Moderate"}, | |
| "qwen-coder": {"description": "Qwen 2.5 Coder 32B (coder-focused, general chat is okay)", "speed": "Moderate"}, | |
| "openai-fast": {"description": "Gemma 7B (Google's open model, good generalist)", "speed": "Moderate"}, | |
| "dbrx": {"description": "DBRX (Databricks's large open model, might be slower)", "speed": "Slow"}, | |
| "mixtral": {"description": "Mixtral 8x7B (Mixture of Experts, good balance of speed/quality)", "speed": "Fast/Moderate"}, | |
| "command-r": {"description": "Command R (Cohere's powerful model)", "speed": "Moderate"}, | |
| "cohere-chat": {"description": "Cohere's general chat model", "speed": "Moderate"}, | |
| "pplx-7b": {"description": "Perplexity Llama 2 7B (fast, good code/text)", "speed": "Fast"}, | |
| "pplx-70b": {"description": "Perplexity Llama 2 70B (larger, more capable Perplexity model)", "speed": "Moderate"}, | |
| "yi-34b": {"description": "Yi 34B (zero-one.ai model, capable generalist)", "speed": "Moderate"}, | |
| "grok": {"description": "Grok (X.ai's model, may have specific tone/style)", "speed": "Moderate"}, | |
| "stable-lm": {"description": "Stable LM (Stability AI's model)", "speed": "Fast"}, | |
| "nous-hermes": {"description": "Nous Hermes (fine-tune of Mistral)", "speed": "Fast"}, | |
| "openchat": {"description": "OpenChat 3.5 (fine-tune of Mistral)", "speed": "Fast"}, | |
| } | |
| current_datetime_vietnam = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=7))).strftime('%Y-%m-%d %H:%M:%S %Z%z') | |
| # Diverse Names Dataset | |
| DIVERSE_NAMES = [ | |
| "Aisha", "Kai", "Sofia", "Liam", "Mei", "Diego", "Priya", "Noah", "Zara", "Ethan", | |
| "Luna", "Caleb", "Jasmine", "Samir", "Chloe", "Finn", "Elara", "Oscar", "Willow", "Rohan", | |
| "Maya", "Leo", "Amara", "Gabriel", "Sienna", "Felix", "Nia", "Hugo", "Isla", "Kian", | |
| "Eva", "Omar", "Anya", "Arthur", "Zoe", "Dante", "Freya", "Ivan", "Layla", "Milo" | |
| ] | |
| # Role-playing system prompts (defaults if user doesn't provide one) | |
| role_play_prompts = [ | |
| """The conversation between User and Assistant. The user asks a question, and the Assistant solves it. | |
| The Assistant **must** simulate a **deep, self-questioning thought process** before answering. Follow these steps: | |
| 1. **Break Down the Problem**: Split the question into sub-components. | |
| 2. **Explore Hypotheses**: Propose 3-4 approaches to solve it, including flawed ones. | |
| 3. **Validate Each Step**: Check assumptions, verify calculations, and test logic. | |
| 4. **Self-Correct**: If an error is found, explain how to fix it. | |
| 5. **Synthesize**: Combine valid insights into a conclusion. | |
| The Assistant’s reasoning **must** mimic a **natural internal monologue**, including: | |
| - Doubts ("Wait, does this assumption hold?"), | |
| - References to concepts or analogies ("This reminds me of..."), | |
| - Counterfactuals ("What if X were different?"). | |
| **Critical Instructions**: | |
| - Use natural self-dialogue: doubts ("Is this assumption valid?"), analogies ("This works like..."), and counterfactuals ("If X were false..."). | |
| - **If uncertain, admit it in the answer** (e.g., "Based on public data up to 2023...", "I might be missing..."). | |
| - **Never state unverified claims as facts**. | |
| - **Recommend verification** for critical details (e.g., "Check the company’s investor relations page for updates"). | |
| Format the response as: | |
| <think> | |
| [Detailed internal dialogue, in a narrative and flowing format, such as: | |
| "First, I need to understand... So, the main objective is... | |
| Hmm, maybe I should consider... | |
| Then, I need to ... | |
| I should improve ... | |
| In addition to this, ... | |
| In addition, the user wants to ... | |
| Testing Hypothesis A: [explanation]. | |
| Oh, that doesn't work because [error]. I'll try Hypothesis B... | |
| Confirming with an example: [specific case]. | |
| Based on the hypotheses I believe that... | |
| The most likely is... | |
| Finally, [summary]."] | |
| </think> | |
| Clear and direct answer, derived of the above reasoning. | |
| If the user want analysis of marketing: | |
| Analyze competitors in [Industry/Niche]: | |
| - Primary competitors: [List 3-5 main competitors] | |
| - Geographic focus: [Specify market region] | |
| - Time period: [Define analysis timeframe] | |
| - Key metrics: [List specific performance indicators] | |
| Required outputs: | |
| 1. Competitive positioning analysis | |
| 2. Strengths/weaknesses assessment | |
| 3. Market opportunity identification | |
| 4. Strategic recommendations | |
| 5. Implementation timeline with KPIs | |
| Please build a prompt using the following guidelines: | |
| Define the Objective: | |
| - Clearly state the main research question or task. | |
| - Specify the desired outcome (e.g., detailed analysis, comparison, recommendations). | |
| Gather Context and Background: | |
| - Include all relevant background information, definitions, and data. | |
| - Specify any boundaries (e.g., scope, timeframes, geographic limits). | |
| Use Specific and Clear Language: | |
| - Provide precise wording and define key terms. | |
| - Avoid vague or ambiguous language. | |
| Provide Step-by-Step Guidance: | |
| - Break the task into sequential steps or sub-tasks. | |
| - Organize instructions using bullet points or numbered lists. | |
| Specify the Desired Output Format: | |
| - Describe how the final answer should be organized (e.g., report format, headings, bullet points, citations). | |
| Include any specific formatting requirements. | |
| Balance Detail with Flexibility: | |
| - Offer sufficient detail to guide the response while allowing room for creative elaboration. | |
| - Avoid over-constraining the prompt to enable exploration of relevant nuances. | |
| Incorporate Iterative Refinement: | |
| - Build in a process to test the prompt and refine it based on initial outputs. | |
| - Allow for follow-up instructions to adjust or expand the response as needed. | |
| Apply Proven Techniques: | |
| - Use methods such as chain-of-thought prompting (e.g., “think step by step”) for complex tasks. | |
| - Encourage the AI to break down problems into intermediate reasoning steps. | |
| Set a Role or Perspective: | |
| - Assign a specific role (e.g., “act as a market analyst” or “assume the perspective of a historian”) to tailor the tone and depth of the analysis. | |
| Avoid Overloading the Prompt: | |
| - Focus on one primary objective or break multiple questions into separate parts. | |
| - Prevent overwhelming the prompt with too many distinct questions. | |
| Request Justification and References: | |
| - Instruct the AI to support its claims with evidence or to reference sources where possible. | |
| - Enhance the credibility and verifiability of the response. | |
| Review and Edit Thoroughly: | |
| - Ensure the final prompt is clear, logically organized, and complete. | |
| - Remove any ambiguous or redundant instructions. | |
| """ | |
| ] | |
| DEFAULT_INITIAL_PROMPTS = [ | |
| "What's been the highlight of your day?", | |
| "How are you feeling right now?", | |
| "Tell me about something that made you smile recently.", | |
| "What hobby brings you the most joy?", | |
| "Is there a book or movie you’ve enjoyed lately?", | |
| "What’s one goal you’re working towards this week?", | |
| "Can you describe a moment you felt proud of yourself?", | |
| "What tradition means the most to you?", | |
| "Have you tried anything new recently?", | |
| "What inspires you on tough days?", | |
| "Describe your ideal weekend.", | |
| "What's your favorite way to relax?", | |
| "Tell me about a challenge you overcame.", | |
| "What song always lifts your mood?", | |
| "How do you stay motivated?", | |
| "What’s a memory you cherish?", | |
| "Is there a place you long to visit?", | |
| "What small act of kindness did you witness today?", | |
| "What’s a skill you’d like to learn?", | |
| "How do you celebrate your achievements?", | |
| "What’s your favorite comfort food?", | |
| "Who in your life are you most grateful for?", | |
| "What adventure would you like to embark on?", | |
| "What's something you’re curious about right now?", | |
| "How do you handle stress?", | |
| "What makes you laugh out loud?", | |
| "Describe a time you felt truly at peace.", | |
| "What does success look like to you?", | |
| "How do you show kindness to others?", | |
| "What are you looking forward to this month?", | |
| "What’s a lesson you’ve recently learned?", | |
| "Is there a quote that resonates with you?", | |
| "What’s a tradition you’d like to start?", | |
| "How do you find balance in life?", | |
| "What’s a project you’re passionate about?", | |
| "What values matter most to you?", | |
| "Describe something that amazed you today.", | |
| "What’s your favorite way to express creativity?", | |
| "How do you unwind after a busy day?", | |
| ] | |
| # --- Chat Function --- | |
| def chat(system, prompt, selected_model_name, seed=None, num_exchanges=5): | |
| if seed is None: | |
| seed = random.randint(0, 1000000) | |
| random.seed(seed) # Set for reproducibility for the whole conversation generation | |
| conversation = [ | |
| {"from": "system", "value": system}, | |
| {"from": "human", "value": prompt} | |
| ] | |
| messages = [ | |
| {"role": "system", "content": system}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| try: | |
| # Initial AI response | |
| ai_response_obj = client.chat.completions.create( | |
| model=selected_model_name, | |
| messages=messages, | |
| max_tokens=150, | |
| temperature=1, | |
| seed=seed # Use base seed for first AI response | |
| ) | |
| ai_response_content = ai_response_obj.choices[0].message.content.strip() | |
| conversation.append({"from": "gpt", "value": ai_response_content}) | |
| messages.append({"role": "assistant", "content": ai_response_content}) | |
| # Loop for subsequent exchanges | |
| for i in range(num_exchanges - 1): # We already did 1 exchange (human initial -> AI response) | |
| # AI generates the *human's* follow-up question/statement | |
| follow_up_prompt_messages = [ | |
| {"role": "system", "content": "You are a helpful and engaging assistant. Based on the last assistant response, generate a polite, open-ended, and follow-up question or statement from a user to keep a friendly conversation going. Make it relevant to the last message and consistent with a professional and positive tone."}, | |
| {"role": "assistant", "content": ai_response_content}, # Use the last AI response as context | |
| {"role": "user", "content": "Generate friendly follow-up question/statement (max 700 words)."} | |
| ] | |
| human_follow_up_obj = client.chat.completions.create( | |
| model=selected_model_name, # Can use the same model | |
| messages=follow_up_prompt_messages, | |
| max_tokens=70, | |
| temperature=1, | |
| seed=seed + 1000 + i # Vary seed for human follow-up generation | |
| ) | |
| human_follow_up_content = human_follow_up_obj.choices[0].message.content.strip() | |
| conversation.append({"from": "human", "value": human_follow_up_content}) | |
| messages.append({"role": "user", "content": human_follow_up_content}) | |
| # AI generates its next response based on the human follow-up | |
| ai_response_obj = client.chat.completions.create( | |
| model=selected_model_name, | |
| messages=messages, # messages now includes the human follow-up | |
| max_tokens=150, | |
| temperature=1, | |
| seed=seed + 2000 + i # Vary seed for next AI response | |
| ) | |
| ai_response_content = ai_response_obj.choices[0].message.content.strip() | |
| conversation.append({"from": "gpt", "value": ai_response_content}) | |
| messages.append({"role": "assistant", "content": ai_response_content}) | |
| return conversation | |
| except Exception as e: | |
| error_message = f"An error occurred with model {selected_model_name}: {e}" | |
| print(error_message) # Print to console for debugging | |
| conversation.append({"from": "error", "value": error_message}) | |
| return conversation | |
| # --- Hugging Face Push Function (for Dataset) --- | |
| def push_file_to_huggingface_dataset(file_path, path_in_repo, commit_message_prefix): | |
| api = HfApi() | |
| hf_token = os.environ.get("HF_TOKEN") | |
| if not hf_token: | |
| log_message = "Hugging Face token (HF_TOKEN environment variable) not found. Cannot push to Hub." | |
| print(log_message) | |
| return log_message | |
| if not os.path.exists(file_path) or os.stat(file_path).st_size == 0: | |
| log_message = f"No data in {file_path} to push to the dataset." | |
| print(log_message) | |
| return log_message | |
| try: | |
| current_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
| commit_message = f"{commit_message_prefix} on {current_time_str} (An Nhơn, Binh Dinh, Vietnam)" | |
| api.upload_file( | |
| path_or_fileobj=file_path, | |
| path_in_repo=path_in_repo, | |
| repo_id=HF_DATASET_REPO_ID, | |
| repo_type="dataset", | |
| commit_message=commit_message, | |
| token=hf_token | |
| ) | |
| log_message = f"Successfully pushed {path_in_repo} to dataset {HF_DATASET_REPO_ID}" | |
| print(log_message) | |
| return log_message | |
| except Exception as e: | |
| log_message = f"Error pushing {path_in_repo} to Hugging Face dataset {HF_DATASET_REPO_ID}: {e}" | |
| print(log_message) | |
| return log_message | |
| # --- Main Generation and Push Function --- | |
| def generate_and_display_conversations(num_conversations_input, custom_prompts_input, custom_system_prompt_input, | |
| commit_subject, commit_body, selected_model_name_input): | |
| num_conversations = int(num_conversations_input) | |
| if num_conversations <= 0: | |
| return "Please enter a number of conversations greater than zero.", "" | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| # --- Load and Clean Existing Conversations --- | |
| existing_conversations = [] | |
| if os.path.exists(DATA_FILE): | |
| with open(DATA_FILE, "r") as f: | |
| for line in f: | |
| try: | |
| existing_conversations.append(json.loads(line.strip())) | |
| except json.JSONDecodeError as e: | |
| print(f"Skipping malformed JSON line in {DATA_FILE}: {line.strip()} - {e}") | |
| # Deduplicate existing conversations | |
| seen_conversations = set() | |
| cleaned_existing_conversations = [] | |
| for conv_entry in existing_conversations: | |
| # Use a string representation of the whole entry for deduplication | |
| conv_str = json.dumps(conv_entry, sort_keys=True) | |
| if conv_str not in seen_conversations: | |
| cleaned_existing_conversations.append(conv_entry) | |
| seen_conversations.add(conv_str) | |
| # Validate and filter existing conversations for completeness (expected length) | |
| expected_msg_len = lambda n_exchanges: 1 + 1 + n_exchanges + (n_exchanges - 1) # System + initial human + AI turns + human follow-ups | |
| validated_existing_conversations = [] | |
| initial_cleaned_count = len(cleaned_existing_conversations) | |
| for conv_entry in cleaned_existing_conversations: | |
| conv_list = conv_entry.get("conversations", []) | |
| # Assume num_exchanges was 5 for old conversations if not stored | |
| # Or more robustly, infer from length. | |
| # Given the fixed num_exchanges=5 for generation, we can check for this. | |
| if len(conv_list) == expected_msg_len(5): | |
| validated_existing_conversations.append(conv_entry) | |
| else: | |
| print(f"Skipping incomplete/malformed existing conversation (length {len(conv_list)} != {expected_msg_len(5)}): {conv_entry}") | |
| all_conversations = list(validated_existing_conversations) # Start with clean existing ones | |
| generation_log = [] | |
| current_time_loc = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " (An Nhơn, Binh Dinh, Vietnam)" | |
| generation_log.append(f"Starting conversation generation at {current_time_loc}") | |
| generation_log.append(f"Loaded and cleaned {len(validated_existing_conversations)} existing conversations (initially {initial_cleaned_count} before validation).") | |
| generation_log.append(f"Generating {num_conversations} *new* conversations.") | |
| model_names_to_use = list(AVAILABLE_MODELS.keys()) | |
| if selected_model_name_input and selected_model_name_input in model_names_to_use: | |
| # If a specific model is selected, all conversations in this batch will use that model | |
| model_selection_info = f"Specific model selected for all new conversations: '{selected_model_name_input}'" | |
| # Determine the model to use for this specific conversation | |
| selected_model_for_this_conv_batch = selected_model_name_input | |
| else: | |
| # If no specific model or invalid model, random models will be picked per conversation | |
| model_selection_info = f"No specific model selected or invalid. Models will be chosen randomly per conversation from: {', '.join(model_names_to_use)}" | |
| selected_model_for_this_conv_batch = None # Indicate random selection per loop | |
| generation_log.append(model_selection_info) | |
| current_prompts = DEFAULT_INITIAL_PROMPTS | |
| if custom_prompts_input: | |
| parsed_custom_prompts = [p.strip() for p in custom_prompts_input.split(',') if p.strip()] | |
| if parsed_custom_prompts: | |
| current_prompts = parsed_custom_prompts | |
| new_conversations_generated = [] | |
| expected_conversation_length = expected_msg_len(5) # Always 5 exchanges for new generations | |
| for i in tqdm(range(num_conversations), desc="Generating conversations"): | |
| seed = random.randint(0, 1000000) | |
| if custom_system_prompt_input: | |
| system = custom_system_prompt_input.strip() | |
| else: | |
| system = random.choice(role_play_prompts) | |
| random_name = random.choice(DIVERSE_NAMES) | |
| prompt_template = random.choice(current_prompts) | |
| prompt = prompt_template.replace("[NAME]", random_name) | |
| # Determine the model to use for this specific conversation | |
| selected_model_for_this_conv = selected_model_for_this_conv_batch if selected_model_for_this_conv_batch else random.choice(model_names_to_use) | |
| generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Generating conv {i+1}/{num_conversations} with '{selected_model_for_this_conv}' (System: '{system[:50]}...')") | |
| conversation = chat(system, prompt, selected_model_for_this_conv, seed=seed, num_exchanges=5) | |
| if len(conversation) == expected_conversation_length and not any(d.get("from") == "error" for d in conversation): | |
| new_conv_entry = {"model_used": selected_model_for_this_conv, "conversations": conversation} | |
| # Add to all_conversations and new_conversations_generated only if not a duplicate of what's already *in memory* | |
| # This handles duplicates from current batch or newly generated identical to existing | |
| new_conv_str = json.dumps(new_conv_entry, sort_keys=True) | |
| if new_conv_str not in seen_conversations: | |
| all_conversations.append(new_conv_entry) | |
| new_conversations_generated.append(new_conv_entry) | |
| seen_conversations.add(new_conv_str) # Mark as seen | |
| generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Successfully generated and added conv {i+1}/{num_conversations}.") | |
| else: | |
| generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Skipped conv {i+1}/{num_conversations} as it's a duplicate.") | |
| else: | |
| generation_log.append(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] Skipping conv {i+1}/{num_conversations} due to error or incorrect length ({len(conversation)} messages, expected {expected_conversation_length}).") | |
| if conversation and conversation[-1].get("from") == "error": | |
| generation_log.append(f" Error details: {conversation[-1]['value']}") | |
| # Save all (cleaned existing + newly generated unique) conversations to JSONL | |
| with open(DATA_FILE, "w") as f: | |
| for conv in all_conversations: | |
| f.write(json.dumps(conv) + "\n") | |
| generation_log.append(f"Saved {len(new_conversations_generated)} *new unique* conversations to {DATA_FILE} (total unique and validated: {len(all_conversations)}).") | |
| generation_log.append("Attempting to push main conversations file to Hugging Face Dataset...") | |
| # --- Auto-push main conversations to Hugging Face Dataset --- | |
| # Use the custom commit message | |
| commit_message = f"{commit_subject.strip()}\n\n{commit_body.strip()}" if commit_body.strip() else commit_subject.strip() | |
| push_status = push_file_to_huggingface_dataset(DATA_FILE, f"data/conversations_{current_datetime_vietnam}.jsonl", commit_message) | |
| generation_log.append(push_status) | |
| generation_log.append(f"Process complete at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} (An Nhơn, Binh Dinh, Vietnam)") | |
| return json.dumps(all_conversations, indent=2), "\n".join(generation_log) | |
| # --- Community Prompts Functions --- | |
| def load_community_prompts(): | |
| prompts = [] | |
| if os.path.exists(COMMUNITY_PROMPTS_FILE): | |
| with open(COMMUNITY_PROMPTS_FILE, "r") as f: | |
| for line in f: | |
| try: | |
| prompts.append(json.loads(line.strip())) | |
| except json.JSONDecodeError: | |
| continue # Skip malformed lines | |
| return prompts | |
| def save_community_prompt(system_prompt, initial_prompt): | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| # Load existing prompts to deduplicate and append | |
| existing_prompts = load_community_prompts() | |
| seen_prompts_for_dedup = set() | |
| cleaned_existing_prompts = [] | |
| for p in existing_prompts: | |
| p_str = json.dumps(p, sort_keys=True) | |
| if p_str not in seen_prompts_for_dedup: | |
| cleaned_existing_prompts.append(p) | |
| seen_prompts_for_dedup.add(p_str) | |
| new_prompt_entry = { | |
| "system_prompt": system_prompt.strip(), | |
| "initial_prompt": initial_prompt.strip(), | |
| "timestamp": datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z%z') | |
| } | |
| new_prompt_str = json.dumps(new_prompt_entry, sort_keys=True) | |
| log_message = [] | |
| if not system_prompt.strip() or not initial_prompt.strip(): | |
| log_message.append("System prompt and Initial prompt cannot be empty.") | |
| elif new_prompt_str in seen_prompts_for_dedup: | |
| log_message.append("This exact prompt pair already exists in the community list.") | |
| else: | |
| cleaned_existing_prompts.append(new_prompt_entry) | |
| with open(COMMUNITY_PROMPTS_FILE, "w") as f: | |
| for p in cleaned_existing_prompts: | |
| f.write(json.dumps(p) + "\n") | |
| log_message.append("Prompt submitted successfully!") | |
| # Immediately attempt to push the updated community prompts file | |
| push_status = push_file_to_huggingface_dataset( | |
| COMMUNITY_PROMPTS_FILE, | |
| HF_COMMUNITY_PROMPT_FILE_IN_REPO, | |
| "Update community_prompts.jsonl from Gradio app" | |
| ) | |
| log_message.append(push_status) | |
| return "\n".join(log_message), json.dumps(cleaned_existing_prompts, indent=2) | |
| # Function to refresh community prompts display | |
| def refresh_community_prompts_display(): | |
| prompts = load_community_prompts() | |
| return json.dumps(prompts, indent=2) | |
| # --- Commit Templates Functions --- | |
| def load_commit_templates(): | |
| if not os.path.exists(COMMIT_TEMPLATES_FILE): | |
| # Create default templates if file doesn't exist | |
| default_templates = [ | |
| {"name": "feat: New Feature", "subject": "feat: ", "body": ""}, | |
| {"name": "fix: Bug Fix", "subject": "fix: ", "body": "Fixes #[issue_number]"}, | |
| {"name": "docs: Documentation", "subject": "docs: ", "body": ""}, | |
| {"name": "chore: Maintenance", "subject": "chore: ", "body": ""}, | |
| {"name": "style: Formatting", "subject": "style: ", "body": ""}, | |
| {"name": "refactor: Code Refactor", "subject": "refactor: ", "body": ""}, | |
| {"name": "perf: Performance Improvement", "subject": "perf: ", "body": ""}, | |
| {"name": "test: Test Update", "subject": "test: ", "body": ""}, | |
| {"name": "Custom Empty", "subject": "", "body": ""} | |
| ] | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| with open(COMMIT_TEMPLATES_FILE, "w") as f: | |
| json.dump(default_templates, f, indent=2) | |
| return default_templates | |
| with open(COMMIT_TEMPLATES_FILE, "r") as f: | |
| try: | |
| return json.load(f) | |
| except json.JSONDecodeError: | |
| return [] # Return empty list if file is malformed | |
| def get_template_choices(): | |
| templates = load_commit_templates() | |
| return [t["name"] for t in templates] | |
| def update_commit_fields(selected_template_name): | |
| templates = load_commit_templates() | |
| for template in templates: | |
| if template["name"] == selected_template_name: | |
| return template["subject"], template["body"] | |
| return "", "" # Fallback if not found | |
| def save_custom_commit_template(template_name, subject, body): | |
| templates = load_commit_templates() | |
| if not template_name.strip(): | |
| return "Template name cannot be empty!", gr.Dropdown.update(choices=get_template_choices()), gr.JSON.update(value=templates) | |
| # Check for existing template with the same name | |
| found = False | |
| for template in templates: | |
| if template["name"] == template_name.strip(): | |
| template["subject"] = subject.strip() | |
| template["body"] = body.strip() | |
| found = True | |
| break | |
| if not found: | |
| templates.append({ | |
| "name": template_name.strip(), | |
| "subject": subject.strip(), | |
| "body": body.strip() | |
| }) | |
| with open(COMMIT_TEMPLATES_FILE, "w") as f: | |
| json.dump(templates, f, indent=2) | |
| return f"Template '{template_name.strip()}' saved successfully!", gr.Dropdown.update(choices=get_template_choices()), gr.JSON.update(value=templates) | |
| def refresh_commit_display(): | |
| templates = load_commit_templates() | |
| return gr.Dropdown.update(choices=get_template_choices()), json.dumps(templates, indent=2) | |
| # Gradio Interface setup | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# DeepRethink") | |
| gr.Markdown( | |
| "Generate engaging, thinking, and positive conversations with various Pollinations.ai models. " | |
| f"Generated data is saved and pushed to the Hugging Face dataset `{HF_DATASET_REPO_ID}`." | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("Generate Conversations"): | |
| with gr.Row(): | |
| num_conversations_input = gr.Slider(minimum=1, maximum=2000, value=3, step=1, label="Number of Conversations to Generate", info="More conversations take longer and might hit API limits.") | |
| gr.Markdown("### Model Selection") | |
| model_selector_dropdown = gr.Dropdown( | |
| label="Select Model (or leave empty for random)", | |
| choices=list(AVAILABLE_MODELS.keys()), # The actual values passed will be model names | |
| value=None, # Default to no selection, implying random | |
| interactive=True, | |
| info="Choose a specific model or let the app pick one randomly for each conversation in the batch." | |
| ) | |
| # Add a Textbox for model description based on selection | |
| model_description_output = gr.Textbox( | |
| label="Selected Model Info", | |
| interactive=False, | |
| lines=2 | |
| ) | |
| def get_model_info(model_name): | |
| if model_name and model_name in AVAILABLE_MODELS: | |
| info = AVAILABLE_MODELS[model_name] | |
| return f"Description: {info['description']}\nSpeed: {info['speed']}" | |
| return "No specific model selected. Conversations will use randomly chosen models from the available list." | |
| model_selector_dropdown.change( | |
| fn=get_model_info, | |
| inputs=model_selector_dropdown, | |
| outputs=model_description_output | |
| ) | |
| custom_system_prompt_input = gr.Textbox( | |
| label="Custom System Prompt (optional)", | |
| placeholder="e.g., You are a helpful and kind AI assistant.", | |
| info="Define the AI's role or personality. If left empty, a random cute role-play prompt will be used.", | |
| lines=3 | |
| ) | |
| custom_prompts_input = gr.Textbox( | |
| label="Custom Initial Prompts (optional)", | |
| placeholder="e.g., What's your favorite color?, Tell me a joke, What makes you happy?", | |
| info="Enter multiple prompts separated by commas. If left empty, default prompts will be used. Make sure to include '[NAME]' if you want a name inserted.", | |
| lines=3 | |
| ) | |
| gr.Markdown("### Hugging Face Commit Message") | |
| with gr.Row(): | |
| commit_template_dropdown = gr.Dropdown( | |
| label="Select Commit Message Template", | |
| choices=get_template_choices(), | |
| value=get_template_choices()[0] if get_template_choices() else None, | |
| interactive=True | |
| ) | |
| refresh_commit_templates_button = gr.Button("Refresh Templates") | |
| commit_subject_input = gr.Textbox( | |
| label="Commit Subject (max 50 chars)", | |
| placeholder="e.g., feat: Add conversation generation feature", | |
| lines=1, | |
| max_lines=1 | |
| ) | |
| commit_body_input = gr.Textbox( | |
| label="Commit Body (optional)", | |
| placeholder="Detailed description of changes. Use imperative mood.", | |
| lines=5 | |
| ) | |
| generate_button = gr.Button("Generate & Push Conversations") | |
| output_conversations = gr.JSON(label="Generated Conversations (Content of conversations.jsonl)") | |
| output_log = gr.Textbox(label="Process Log", interactive=False, lines=10, max_lines=20) | |
| # Link commit template dropdown to update fields | |
| commit_template_dropdown.change( | |
| fn=update_commit_fields, | |
| inputs=commit_template_dropdown, | |
| outputs=[commit_subject_input, commit_body_input] | |
| ) | |
| # Initial load of commit fields based on default/first template | |
| demo.load( | |
| fn=lambda: update_commit_fields(get_template_choices()[0] if get_template_choices() else None), | |
| inputs=None, | |
| outputs=[commit_subject_input, commit_body_input] | |
| ) | |
| generate_button.click( | |
| fn=generate_and_display_conversations, | |
| inputs=[ | |
| num_conversations_input, | |
| custom_prompts_input, | |
| custom_system_prompt_input, | |
| commit_subject_input, # Pass commit subject | |
| commit_body_input, # Pass commit body | |
| model_selector_dropdown # Pass selected model name | |
| ], | |
| outputs=[output_conversations, output_log], | |
| show_progress=True | |
| ) | |
| with gr.Tab("Community Prompts"): | |
| gr.Markdown("## Share Your Favorite Prompts with the Community!") | |
| gr.Markdown( | |
| "Submit cute and engaging system prompts and initial prompts here. " | |
| "These will be added to a shared list for others to see and use." | |
| ) | |
| community_system_prompt_input = gr.Textbox( | |
| label="Your System Prompt", | |
| placeholder="e.g., You are a tiny, cheerful squirrel, Squeaky, who loves nuts and collecting shiny things.", | |
| lines=3, | |
| interactive=True | |
| ) | |
| community_initial_prompt_input = gr.Textbox( | |
| label="Your Initial Prompt (Use [NAME] for dynamic naming)", | |
| placeholder="e.g., Hey [NAME], what's your favorite type of acorn?", | |
| lines=2, | |
| interactive=True | |
| ) | |
| submit_community_prompt_button = gr.Button("Submit Prompt to Community") | |
| community_submit_status = gr.Textbox(label="Submission Status", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("## Current Community Prompts") | |
| refresh_community_prompts_button = gr.Button("Refresh Community Prompts") | |
| community_prompts_display = gr.JSON(label="Submitted Community Prompts") | |
| submit_community_prompt_button.click( | |
| fn=save_community_prompt, | |
| inputs=[community_system_prompt_input, community_initial_prompt_input], | |
| outputs=[community_submit_status, community_prompts_display], | |
| show_progress=True | |
| ) | |
| # Initial load and refresh action for community prompts | |
| demo.load(refresh_community_prompts_display, inputs=None, outputs=community_prompts_display) | |
| refresh_community_prompts_button.click(refresh_community_prompts_display, inputs=None, outputs=community_prompts_display) | |
| with gr.Tab("Manage Commit Templates"): # New Tab for Commit Templates | |
| gr.Markdown("## Manage Your Local Git Commit Message Templates") | |
| gr.Markdown( | |
| "Select an existing template to edit, or enter a new name to create a new one. " | |
| "These templates are saved locally in `generated/commits.json`." | |
| ) | |
| commit_template_edit_dropdown = gr.Dropdown( | |
| label="Select Template to Edit/View", | |
| choices=get_template_choices(), | |
| value=get_template_choices()[0] if get_template_choices() else None, | |
| interactive=True | |
| ) | |
| commit_template_name_input = gr.Textbox( | |
| label="Template Name (for saving new or editing existing)", | |
| placeholder="e.g., feat: Add New Feature Template" | |
| ) | |
| commit_template_subject_input = gr.Textbox( | |
| label="Template Subject Line", | |
| placeholder="e.g., feat: " | |
| ) | |
| commit_template_body_input = gr.Textbox( | |
| label="Template Body (optional)", | |
| placeholder="e.g., - Detailed description of the feature\n- Related issue: #XYZ", | |
| lines=5 | |
| ) | |
| save_template_button = gr.Button("Save/Update Template") | |
| template_status_output = gr.Textbox(label="Template Save Status", interactive=False) | |
| all_templates_display = gr.JSON(label="All Current Commit Templates") | |
| # Link dropdown to populate edit fields | |
| commit_template_edit_dropdown.change( | |
| fn=lambda name: (name, update_commit_fields(name)[0], update_commit_fields(name)[1]), | |
| inputs=commit_template_edit_dropdown, | |
| outputs=[commit_template_name_input, commit_template_subject_input, commit_template_body_input] | |
| ) | |
| # Action to save/update template | |
| save_template_button.click( | |
| fn=save_custom_commit_template, | |
| inputs=[commit_template_name_input, commit_template_subject_input, commit_template_body_input], | |
| outputs=[template_status_output, commit_template_edit_dropdown, all_templates_display] # Update dropdown and JSON display | |
| ) | |
| # Initial load of template management tab | |
| demo.load( | |
| fn=lambda: ( | |
| get_template_choices()[0] if get_template_choices() else None, # initial dropdown value | |
| get_template_choices()[0] if get_template_choices() else None, # initial name input | |
| update_commit_fields(get_template_choices()[0] if get_template_choices() else None)[0], # initial subject | |
| update_commit_fields(get_template_choices()[0] if get_template_choices() else None)[1], # initial body | |
| json.dumps(load_commit_templates(), indent=2) # initial JSON display | |
| ), | |
| inputs=None, | |
| outputs=[ | |
| commit_template_edit_dropdown, | |
| commit_template_name_input, | |
| commit_template_subject_input, | |
| commit_template_body_input, | |
| all_templates_display | |
| ] | |
| ) | |
| # Refresh button for the main commit templates dropdown in 'Generate Conversations' tab | |
| refresh_commit_templates_button.click( | |
| fn=refresh_commit_display, | |
| inputs=None, | |
| outputs=[commit_template_dropdown, all_templates_display] # Refresh both dropdowns and the JSON display | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown( | |
| "**Note on Push to Hub:** This Space is configured to automatically push generated data and " | |
| "community prompts to the Hugging Face dataset " | |
| f"`{HF_DATASET_REPO_ID}` using a Hugging Face token securely stored as a Space Secret (`HF_TOKEN`). " | |
| "User tokens are not required." | |
| ) | |
| # gr.Markdown(f"Current server time: {current_datetime_vietnam} (An Nhơn, Binh Dinh, Vietnam)") | |
| # Launch the Gradio app | |
| if __name__ == "__main__": | |
| # Ensure output directory exists and default commit templates exist on startup | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| load_commit_templates() # This will create the file if it doesn't exist with defaults | |
| demo.launch(debug=True, share=False) |