Spaces:

Yatro
/

Ectus-R_Code_Generation-Demo

Sleeping

AION Protocol Development commited on Oct 5, 2025

Commit

1c9d722

1 Parent(s): 827e553

feat: Simplified UI - Pure prompt evaluation like Claude Code

MAJOR CHANGES:
- Removed all UI controls (language, framework, temperature sliders)
- Pure prompt evaluation: AI decides everything from instructions
- Increased max_tokens: Claude 200K, GPT-4o 16K, Groq 32K, Gemini 65K
- Updated SYSTEM_PROMPT to emphasize instruction-following
- Temperature fixed at 0.7 (balanced)

UX PHILOSOPHY:
- Like Claude Code: user writes detailed instructions
- AI interprets and decides language, framework, architecture
- Tests model's ability to read requirements and contract
- No hand-holding - evaluate pure AI capability

EXAMPLES UPDATED:
- Now include language/framework IN the prompt text
- Example: 'Create REST API in Rust using Axum...'
- NOT: Separate dropdown for 'Rust' + 'Axum'

Context Window: 200,000 tokens output (Claude Sonnet 4.5)

Files changed (1) hide show

app.py +51 -111

app.py CHANGED Viewed

@@ -80,20 +80,23 @@ SYSTEM_PROMPT = """You are Ectus-R, an expert autonomous software engineer power
 Your task is to generate production-ready code based on user requirements.
 REQUIREMENTS:
-1. Write clean, idiomatic code following best practices
-2. Include comprehensive error handling
-3. Add inline comments explaining complex logic
-4. Generate unit tests
-5. Create deployment configuration (Dockerfile)
-6. Use modern language features and libraries
 OUTPUT FORMAT:
-1. Main source code
-2. Unit tests
-3. Dockerfile
 4. Brief README with usage instructions
-Be concise but complete. Focus on quality over quantity."""
 def generate_code_with_model(prompt: str, model_name: str, temperature: float = 0.7):
     """Generate code using specified model"""
@@ -119,7 +122,7 @@ def generate_code_with_model(prompt: str, model_name: str, temperature: float =
             client = anthropic.Anthropic(api_key=os.getenv(config["api_key_env"]))
             response = client.messages.create(
                 model=config["model"],
-                max_tokens=4096,
                 temperature=temperature,
                 system=SYSTEM_PROMPT,
                 messages=[{"role": "user", "content": prompt}]
@@ -140,7 +143,7 @@ def generate_code_with_model(prompt: str, model_name: str, temperature: float =
                     {"role": "user", "content": prompt}
                 ],
                 temperature=temperature,
-                max_tokens=4096
             )
             generated_code = response.choices[0].message.content
             input_tokens = response.usage.prompt_tokens
@@ -155,7 +158,7 @@ def generate_code_with_model(prompt: str, model_name: str, temperature: float =
                     {"role": "user", "content": prompt}
                 ],
                 temperature=temperature,
-                max_tokens=4096
             )
             generated_code = response.choices[0].message.content
             input_tokens = response.usage.prompt_tokens
@@ -167,7 +170,7 @@ def generate_code_with_model(prompt: str, model_name: str, temperature: float =
             model = genai.GenerativeModel(config["model"])
             response = model.generate_content(
                 f"{SYSTEM_PROMPT}\n\nUser request: {prompt}",
-                generation_config={"temperature": temperature, "max_output_tokens": 4096}
             )
             generated_code = response.text
             input_tokens = response.usage_metadata.prompt_token_count
@@ -247,38 +250,22 @@ def generate_code_with_model(prompt: str, model_name: str, temperature: float =
         "tokens_per_sec": tokens_per_sec
     }
-def single_model_generation(prompt: str, model: str, temperature: float, language: str, framework: str, context_window: int):
-    """Generate code with selected model"""
     if not prompt.strip():
         return "Please enter a project description."
-    # Build enhanced prompt with language/framework if specified
-    enhanced_prompt = prompt
-    if language.strip():
-        enhanced_prompt = f"Generate {language} code"
-        if framework.strip():
-            enhanced_prompt += f" using {framework}"
-        enhanced_prompt += f" for the following project:\n\n{prompt}"
-    # Add context window info to prompt
-    enhanced_prompt += f"\n\nNote: Keep response within {context_window} tokens."
-    result = generate_code_with_model(enhanced_prompt, model, temperature)
-    lang_info = f"{language}" if language.strip() else "Auto-detected"
-    if framework.strip():
-        lang_info += f" + {framework}"
     output = f"""# Generated Code: {model}
 **Generation Time:** {result['elapsed_time']:.2f}s
-**Language/Framework:** {lang_info}
 **Lines of Code:** {result['loc']}
 **Tokens:** {result['input_tokens']} in → {result['output_tokens']} out
 **Speed:** {result['tokens_per_sec']:.0f} tokens/sec
 **Cost:** ${result['cost']:.4f}
-**Context Window:** {context_window} tokens
 ---
@@ -287,26 +274,17 @@ def single_model_generation(prompt: str, model: str, temperature: float, languag
     return output
-def multi_model_comparison(prompt: str, language: str, framework: str, temperature: float, context_window: int):
-    """Compare all models on same prompt"""
     if not prompt.strip():
         return pd.DataFrame(), "Please enter a project description."
-    # Build enhanced prompt with language/framework if specified
-    enhanced_prompt = prompt
-    if language.strip():
-        enhanced_prompt = f"Generate {language} code"
-        if framework.strip():
-            enhanced_prompt += f" using {framework}"
-        enhanced_prompt += f" for: {prompt}"
-    enhanced_prompt += f"\n\nNote: Keep response within {context_window} tokens."
     results = []
     for model_name in MODEL_CONFIGS.keys():
-        result = generate_code_with_model(enhanced_prompt, model_name, temperature)
         results.append({
             "Model": model_name,
@@ -355,15 +333,21 @@ with gr.Blocks(
     """)
     with gr.Tab("🚀 Single Model Generation"):
-        gr.Markdown("Generate production-ready code with your choice of AI model")
         with gr.Row():
             with gr.Column(scale=1):
                 prompt_input = gr.Textbox(
                     label="Project Description",
-                    placeholder="Example: Create a REST API for a blog with users and posts. Include JWT authentication, PostgreSQL database, and Docker deployment.",
-                    lines=8,
-                    value="Create a simple TODO list API with CRUD operations using REST principles."
                 )
                 model_select = gr.Dropdown(
@@ -373,31 +357,6 @@ with gr.Blocks(
                     info="Select the model to generate code"
                 )
-                with gr.Row():
-                    language_input = gr.Textbox(
-                        label="Language (Optional)",
-                        placeholder="e.g., Rust, Python, TypeScript, Go, Java - Leave empty for AI to decide",
-                        value=""
-                    )
-                    framework_input = gr.Textbox(
-                        label="Framework (Optional)",
-                        placeholder="e.g., Axum, FastAPI, Express, Django - Leave empty for AI to decide",
-                        value=""
-                    )
-                with gr.Row():
-                    temp_slider = gr.Slider(
-                        0.0, 1.0, 0.5,
-                        label="Temperature",
-                        info="Higher = more creative, Lower = more deterministic"
-                    )
-                    context_slider = gr.Slider(
-                        1000, 8000, 4000,
-                        step=500,
-                        label="Context Window (tokens)",
-                        info="Maximum tokens in response"
-                    )
                 generate_btn = gr.Button("Generate Code", variant="primary", size="lg")
             with gr.Column(scale=2):
@@ -408,56 +367,37 @@ with gr.Blocks(
         generate_btn.click(
             single_model_generation,
-            inputs=[prompt_input, model_select, temp_slider, language_input, framework_input, context_slider],
             outputs=output_single
         )
         gr.Examples(
             examples=[
-                ["Create a REST API for a blog with users and posts", "Claude Sonnet 4.5 💎", 0.5, "Rust", "Axum", 4000],
-                ["Build a CLI tool for file encryption using AES-256", "GPT-4o 💎", 0.5, "Python", "Click", 3000],
-                ["Implement a rate limiter middleware for web APIs", "Llama 3.3 70B (Groq) 🚀", 0.5, "TypeScript", "Express", 4000],
             ],
-            inputs=[prompt_input, model_select, temp_slider, language_input, framework_input, context_slider]
         )
     with gr.Tab("⚡ Multi-Model Comparison"):
-        gr.Markdown("Compare all 6 AI models side-by-side on the same task")
         with gr.Row():
             with gr.Column(scale=1):
                 prompt_compare = gr.Textbox(
                     label="Project Description (tested on ALL models)",
-                    placeholder="Create a simple TODO app API...",
-                    lines=6,
-                    value="Create a minimal REST API for a TODO list with create, read, update, delete operations."
                 )
-                with gr.Row():
-                    language_compare = gr.Textbox(
-                        label="Language (Optional)",
-                        placeholder="e.g., Python, Rust, TypeScript - Leave empty for AI to decide",
-                        value=""
-                    )
-                    framework_compare = gr.Textbox(
-                        label="Framework (Optional)",
-                        placeholder="e.g., FastAPI, Axum, Express - Leave empty for AI to decide",
-                        value=""
-                    )
-                with gr.Row():
-                    temp_compare = gr.Slider(
-                        0.0, 1.0, 0.5,
-                        label="Temperature",
-                        info="Higher = more creative, Lower = more deterministic"
-                    )
-                    context_compare = gr.Slider(
-                        1000, 8000, 4000,
-                        step=500,
-                        label="Context Window (tokens)",
-                        info="Maximum tokens in response"
-                    )
                 compare_btn = gr.Button("Compare All Models", variant="primary", size="lg")
             with gr.Column(scale=2):
@@ -469,7 +409,7 @@ with gr.Blocks(
         compare_btn.click(
             multi_model_comparison,
-            inputs=[prompt_compare, language_compare, framework_compare, temp_compare, context_compare],
             outputs=[comparison_table, winner_msg]
         )

 Your task is to generate production-ready code based on user requirements.
 REQUIREMENTS:
+1. Read the user's instructions carefully and decide language, framework, and architecture accordingly
+2. Write clean, idiomatic code following best practices
+3. Include comprehensive error handling
+4. Add inline comments explaining complex logic
+5. Generate unit tests when appropriate
+6. Create deployment configuration (Dockerfile) when needed
+7. Use modern language features and libraries
 OUTPUT FORMAT:
+1. Main source code with complete implementation
+2. Unit tests (if requested or beneficial)
+3. Dockerfile (if deployment mentioned)
 4. Brief README with usage instructions
+Context window: 200,000 tokens output - you can generate comprehensive solutions.
+Be complete and thorough. Focus on quality and production-readiness."""
 def generate_code_with_model(prompt: str, model_name: str, temperature: float = 0.7):
     """Generate code using specified model"""
             client = anthropic.Anthropic(api_key=os.getenv(config["api_key_env"]))
             response = client.messages.create(
                 model=config["model"],
+                max_tokens=200000,
                 temperature=temperature,
                 system=SYSTEM_PROMPT,
                 messages=[{"role": "user", "content": prompt}]
                     {"role": "user", "content": prompt}
                 ],
                 temperature=temperature,
+                max_tokens=16000  # GPT-4o max is 16K
             )
             generated_code = response.choices[0].message.content
             input_tokens = response.usage.prompt_tokens
                     {"role": "user", "content": prompt}
                 ],
                 temperature=temperature,
+                max_tokens=32000  # Groq supports up to 32K
             )
             generated_code = response.choices[0].message.content
             input_tokens = response.usage.prompt_tokens
             model = genai.GenerativeModel(config["model"])
             response = model.generate_content(
                 f"{SYSTEM_PROMPT}\n\nUser request: {prompt}",
+                generation_config={"temperature": temperature, "max_output_tokens": 65536}  # Gemini 2.0 Flash supports up to 8K (65536 is max for SDK)
             )
             generated_code = response.text
             input_tokens = response.usage_metadata.prompt_token_count
         "tokens_per_sec": tokens_per_sec
     }
+def single_model_generation(prompt: str, model: str):
+    """Generate code with selected model - pure prompt evaluation"""
     if not prompt.strip():
         return "Please enter a project description."
+    # Use prompt directly - let AI decide everything from instructions
+    result = generate_code_with_model(prompt, model, temperature=0.7)
     output = f"""# Generated Code: {model}
 **Generation Time:** {result['elapsed_time']:.2f}s
 **Lines of Code:** {result['loc']}
 **Tokens:** {result['input_tokens']} in → {result['output_tokens']} out
 **Speed:** {result['tokens_per_sec']:.0f} tokens/sec
 **Cost:** ${result['cost']:.4f}
 ---
     return output
+def multi_model_comparison(prompt: str):
+    """Compare all models on same prompt - pure prompt evaluation"""
     if not prompt.strip():
         return pd.DataFrame(), "Please enter a project description."
+    # Use prompt directly - let AI decide everything from instructions
     results = []
     for model_name in MODEL_CONFIGS.keys():
+        result = generate_code_with_model(prompt, model_name, temperature=0.7)
         results.append({
             "Model": model_name,
     """)
     with gr.Tab("🚀 Single Model Generation"):
+        gr.Markdown("""
+        Generate production-ready code with your choice of AI model.
+        **Pure prompt evaluation:** Describe your requirements in detail. The AI will decide language, framework, and architecture based on your instructions.
+        **Context Window:** 200,000 tokens output
+        """)
         with gr.Row():
             with gr.Column(scale=1):
                 prompt_input = gr.Textbox(
                     label="Project Description",
+                    placeholder="Example: Create a REST API in Rust using Axum for a blog with users and posts. Include JWT authentication, PostgreSQL database, unit tests, and Docker deployment with multi-stage build.",
+                    lines=10,
+                    value="Create a minimal REST API for a TODO list with create, read, update, delete operations. Use best practices and include tests."
                 )
                 model_select = gr.Dropdown(
                     info="Select the model to generate code"
                 )
                 generate_btn = gr.Button("Generate Code", variant="primary", size="lg")
             with gr.Column(scale=2):
         generate_btn.click(
             single_model_generation,
+            inputs=[prompt_input, model_select],
             outputs=output_single
         )
         gr.Examples(
             examples=[
+                ["Create a REST API in Rust using Axum for a blog with users and posts. Include JWT authentication, PostgreSQL database, unit tests, and Docker deployment.", "Claude Sonnet 4.5 ���"],
+                ["Build a CLI tool in Python for file encryption using AES-256 with Click framework. Include progress bars and error handling.", "GPT-4o 💎"],
+                ["Implement a rate limiter middleware in TypeScript for Express web APIs. Support Redis backend and configurable limits per endpoint.", "Llama 3.3 70B (Groq) 🚀"],
             ],
+            inputs=[prompt_input, model_select]
         )
     with gr.Tab("⚡ Multi-Model Comparison"):
+        gr.Markdown("""
+        Compare all 6 AI models side-by-side on the same task.
+        **Pure prompt evaluation:** Each model reads the same instructions and decides implementation details independently.
+        **Context Window:** 200,000 tokens output per model
+        """)
         with gr.Row():
             with gr.Column(scale=1):
                 prompt_compare = gr.Textbox(
                     label="Project Description (tested on ALL models)",
+                    placeholder="Example: Create a REST API in Python using FastAPI for a TODO list with create, read, update, delete operations. Include SQLAlchemy models, Pydantic schemas, and basic tests.",
+                    lines=8,
+                    value="Create a minimal REST API for a TODO list with create, read, update, delete operations. Use best practices and include tests."
                 )
                 compare_btn = gr.Button("Compare All Models", variant="primary", size="lg")
             with gr.Column(scale=2):
         compare_btn.click(
             multi_model_comparison,
+            inputs=[prompt_compare],
             outputs=[comparison_table, winner_msg]
         )