Spaces:

ReallyFloppyPenguin
/

SynthGen

Sleeping

App Files Files Community

ReallyFloppyPenguin commited on Apr 5, 2025

Commit

230e3e0

verified ·

1 Parent(s): bffeb3e

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -79

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import json
 import tempfile
 import os
 import re # For parsing conversation
-from typing import Union, Optional # Add Optional
 # Import the actual functions from synthgen
 from synthgen import (
     generate_synthetic_text,
@@ -154,25 +154,39 @@ def generate_prompts_ui(
 # --- Modified Generation Wrappers ---
-# Wrapper for text generation + JSON preparation
 def run_generation_and_prepare_json(
     prompt: str,
     model: str,
     num_samples: int,
-    temperature: float, # Add settings
     top_p: float,
     max_tokens: int
-):
     """Generates text samples and prepares a JSON file for download."""
-    # Handle optional settings (Gradio might pass default if not interacted with)
-    temp_val = temperature if temperature > 0 else None # Allow 0 but treat as None if needed? OpenRouter usually uses >0. Let's map 0 to None.
-    top_p_val = top_p if 0 < top_p <= 1 else None # top_p must be > 0 and <= 1
-    max_tokens_val = max_tokens if max_tokens > 0 else None # Max tokens should be positive
     if not prompt:
-        return "Error: Please enter a prompt.", None
     if num_samples <= 0:
-        return "Error: Number of samples must be positive.", None
     output_str = f"Generating {num_samples} samples using model '{model}'...\n"
     output_str += f"(Settings: Temp={temp_val}, Top-P={top_p_val}, MaxTokens={max_tokens_val})\n"
@@ -180,48 +194,58 @@ def run_generation_and_prepare_json(
     results_list = []
     for i in range(num_samples):
-        # Pass settings to the backend function
         generated_text = generate_synthetic_text(
-            prompt,
-            model,
-            temperature=temp_val,
-            top_p=top_p_val,
-            max_tokens=max_tokens_val
         )
         output_str += f"--- Sample {i+1} ---\n"
         output_str += generated_text + "\n\n"
         if not generated_text.startswith("Error:"):
             results_list.append(generated_text)
-        else:
-            pass
     output_str += "="*20 + "\nGeneration complete (check results above for errors)."
     json_filepath = create_json_file(results_list, "text_samples.json")
-    return output_str, json_filepath
-# Wrapper for conversation generation + JSON preparation
 def run_conversation_generation_and_prepare_json(
     system_prompts_text: str,
     model: str,
     num_turns: int,
-    temperature: float, # Add settings
     top_p: float,
     max_tokens: int
-):
     """Generates conversations and prepares a JSON file for download."""
     temp_val = temperature if temperature > 0 else None
     top_p_val = top_p if 0 < top_p <= 1 else None
     max_tokens_val = max_tokens if max_tokens > 0 else None
     if not system_prompts_text:
-        return "Error: Please enter or generate at least one system prompt/topic.", None
     if num_turns <= 0:
-        return "Error: Number of turns must be positive.", None
     prompts = [p.strip() for p in system_prompts_text.strip().split('\n') if p.strip()]
     if not prompts:
-        return "Error: No valid prompts found in the input.", None
     output_str = f"Generating {len(prompts)} conversations ({num_turns} turns each) using model '{model}'...\n"
     output_str += f"(Settings: Temp={temp_val}, Top-P={top_p_val}, MaxTokens={max_tokens_val})\n"
@@ -229,70 +253,33 @@ def run_conversation_generation_and_prepare_json(
     results_list_structured = []
     for i, prompt in enumerate(prompts):
-        # Pass settings to the backend function
         conversation_text = generate_synthetic_conversation(
-            prompt,
-            model,
-            num_turns,
-            temperature=temp_val,
-            top_p=top_p_val,
-            max_tokens=max_tokens_val
         )
         output_str += f"--- Conversation {i+1}/{len(prompts)} ---\n"
         output_str += conversation_text + "\n\n"
-        # Parse the generated text block for JSON structure
-        # Note: generate_synthetic_conversation includes a title like "Generated conversation for..."
-        # We might want to remove that before parsing or adjust the parser.
-        # Let's assume the core conversation starts after the first line break if a title exists.
         core_conversation_text = conversation_text
-        if "\n\n" in conversation_text:
-             # Split only if the separator is present and the text doesn't start with Error:
-             if not conversation_text.startswith("Error:"):
-                 parts = conversation_text.split("\n\n", 1)
-                 if len(parts) > 1:
-                     core_conversation_text = parts[1]
-                 else: # Handle case where title might not have double newline
-                     core_conversation_text = conversation_text # Fallback to full text
-             else:
-                 core_conversation_text = None # Don't try to parse errors
-        elif conversation_text.startswith("Error:"):
-            core_conversation_text = None # Don't try to parse errors
-        # Else: No double newline, assume the whole text is the conversation (or error)
         if core_conversation_text:
             messages = parse_conversation_string(core_conversation_text)
-            if messages: # Add only if parsing was successful
-                 results_list_structured.append({
-                     "prompt": prompt,
-                     "messages": messages
-                 })
-            else: # Parsing failed, optionally add raw text or error placeholder
-                 results_list_structured.append({
-                     "prompt": prompt,
-                     "error": "Failed to parse conversation structure.",
-                     "raw_text": core_conversation_text # Include raw text if parsing failed
-                 })
-        elif conversation_text.startswith("Error:"):
-             results_list_structured.append({
-                 "prompt": prompt,
-                 "error": conversation_text # Include the error message from generation
-             })
-        else: # Handle case where core_conversation_text became None unexpectedly or original text was just a title
-             results_list_structured.append({
-                "prompt": prompt,
-                "error": "Could not extract conversation content for parsing.",
-                "raw_text": conversation_text
-             })
     output_str += "="*40 + "\nGeneration complete (check results above for errors)."
-    # Create JSON file from the structured list
     json_filepath = create_json_file(results_list_structured, "conversations.json")
-    return output_str, json_filepath
 # --- Gradio Interface Definition ---
@@ -419,4 +406,4 @@ if __name__ == "__main__":
     print("Launching Gradio App...")
     print("Make sure the OPENROUTER_API_KEY environment variable is set.")
     # Use share=True for temporary public link if running locally and need to test
-    demo.launch(share=True) # share=True

 import tempfile
 import os
 import re # For parsing conversation
+from typing import Union, Optional, Dict # Import Dict
 # Import the actual functions from synthgen
 from synthgen import (
     generate_synthetic_text,
 # --- Modified Generation Wrappers ---
+# Wrapper for text generation + JSON preparation - RETURNS DICT
 def run_generation_and_prepare_json(
     prompt: str,
     model: str,
     num_samples: int,
+    temperature: float,
     top_p: float,
     max_tokens: int
+) -> Dict[gr.Textbox, str]: # Return type hint (optional but good practice)
     """Generates text samples and prepares a JSON file for download."""
+    # Handle optional settings
+    temp_val = temperature if temperature > 0 else None
+    top_p_val = top_p if 0 < top_p <= 1 else None
+    max_tokens_val = max_tokens if max_tokens > 0 else None
+    # Define component objects used in return dict keys - MUST MATCH OUTPUTS
+    # This requires the components to be defined *before* this function,
+    # which isn't the case. So we cannot use component objects as keys here.
+    # Gradio handles mapping if the keys are strings matching component labels
+    # OR if we return gr.update targeting components.
+    # Let's return explicit gr.update for clarity and robustness.
     if not prompt:
+        # Return updates for both outputs
+        return {
+            output_text: gr.update(value="Error: Please enter a prompt."),
+            download_file_text: gr.update(value=None) # Clear file output
+        }
     if num_samples <= 0:
+         return {
+            output_text: gr.update(value="Error: Number of samples must be positive."),
+            download_file_text: gr.update(value=None)
+        }
     output_str = f"Generating {num_samples} samples using model '{model}'...\n"
     output_str += f"(Settings: Temp={temp_val}, Top-P={top_p_val}, MaxTokens={max_tokens_val})\n"
     results_list = []
     for i in range(num_samples):
         generated_text = generate_synthetic_text(
+            prompt, model, temperature=temp_val, top_p=top_p_val, max_tokens=max_tokens_val
         )
         output_str += f"--- Sample {i+1} ---\n"
         output_str += generated_text + "\n\n"
         if not generated_text.startswith("Error:"):
             results_list.append(generated_text)
     output_str += "="*20 + "\nGeneration complete (check results above for errors)."
     json_filepath = create_json_file(results_list, "text_samples.json")
+    # Return dictionary mapping components to updates
+    return {
+        output_text: gr.update(value=output_str),
+        download_file_text: gr.update(value=json_filepath) # Update file path
+    }
+# Wrapper for conversation generation + JSON preparation - RETURNS DICT
 def run_conversation_generation_and_prepare_json(
     system_prompts_text: str,
     model: str,
     num_turns: int,
+    temperature: float,
     top_p: float,
     max_tokens: int
+) -> Dict[gr.Textbox, str]: # Return type hint (optional)
     """Generates conversations and prepares a JSON file for download."""
     temp_val = temperature if temperature > 0 else None
     top_p_val = top_p if 0 < top_p <= 1 else None
     max_tokens_val = max_tokens if max_tokens > 0 else None
+    # Define component objects used in return dict keys - requires components defined first.
+    # Using explicit gr.update instead.
     if not system_prompts_text:
+        return {
+            output_conv: gr.update(value="Error: Please enter or generate at least one system prompt/topic."),
+            download_file_conv: gr.update(value=None)
+        }
     if num_turns <= 0:
+         return {
+            output_conv: gr.update(value="Error: Number of turns must be positive."),
+            download_file_conv: gr.update(value=None)
+        }
     prompts = [p.strip() for p in system_prompts_text.strip().split('\n') if p.strip()]
     if not prompts:
+        return {
+            output_conv: gr.update(value="Error: No valid prompts found in the input."),
+            download_file_conv: gr.update(value=None)
+        }
     output_str = f"Generating {len(prompts)} conversations ({num_turns} turns each) using model '{model}'...\n"
     output_str += f"(Settings: Temp={temp_val}, Top-P={top_p_val}, MaxTokens={max_tokens_val})\n"
     results_list_structured = []
     for i, prompt in enumerate(prompts):
         conversation_text = generate_synthetic_conversation(
+            prompt, model, num_turns, temperature=temp_val, top_p=top_p_val, max_tokens=max_tokens_val
         )
         output_str += f"--- Conversation {i+1}/{len(prompts)} ---\n"
         output_str += conversation_text + "\n\n"
+        # --- Parsing Logic ---
         core_conversation_text = conversation_text
+        if conversation_text.startswith("Error:"): core_conversation_text = None
+        elif "\n\n" in conversation_text:
+             parts = conversation_text.split("\n\n", 1)
+             core_conversation_text = parts[1] if len(parts) > 1 else conversation_text
         if core_conversation_text:
             messages = parse_conversation_string(core_conversation_text)
+            if messages: results_list_structured.append({"prompt": prompt, "messages": messages})
+            else: results_list_structured.append({"prompt": prompt, "error": "Failed to parse structure.", "raw_text": core_conversation_text})
+        elif conversation_text.startswith("Error:"): results_list_structured.append({"prompt": prompt, "error": conversation_text})
+        else: results_list_structured.append({"prompt": prompt, "error": "Could not extract content.", "raw_text": conversation_text})
+        # --- End Parsing Logic ---
     output_str += "="*40 + "\nGeneration complete (check results above for errors)."
     json_filepath = create_json_file(results_list_structured, "conversations.json")
+    # Return dictionary mapping components to updates
+    return {
+        output_conv: gr.update(value=output_str),
+        download_file_conv: gr.update(value=json_filepath)
+    }
 # --- Gradio Interface Definition ---
     print("Launching Gradio App...")
     print("Make sure the OPENROUTER_API_KEY environment variable is set.")
     # Use share=True for temporary public link if running locally and need to test
+    demo.launch() # share=True