Spaces:

Yatro
/

Ectus-R_Code_Generation-Demo

Sleeping

AION Protocol Development commited on Oct 5, 2025

Commit

b883a41

1 Parent(s): ce725ca

feat: Add Qwen2.5-Coder-32B and Phi-4-mini via HF Inference API + limit to 32K tokens

- Added TIER 5: FREE HUGGINGFACE MODELS
- Qwen2.5-Coder-32B-Instruct (32B code specialist)
- Phi-4-mini-instruct (Microsoft efficient model)
- Changed max_tokens from 64000 to 32000 (user request - fix 400 error)
- Updated context_window to 32000 in MODEL_CONFIGS
- Updated UI text: 64,000 → 32,000 tokens
- HuggingFace Inference API provider already implemented
- Models auto-appear in dropdown via list(MODEL_CONFIGS.keys())

Files changed (1) hide show

app.py +27 -6

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ MODEL_CONFIGS = {
         "model": "claude-sonnet-4-20250514",
         "api_key_env": "ANTHROPIC_API_KEY",
         "cost_per_1M_tokens": 3.00,
-        "context_window": 64000,
         "tier": "premium",
         "description": "Best for complex architecture"
     },
@@ -73,6 +73,27 @@ MODEL_CONFIGS = {
         "context_window": 1000000,
         "tier": "free-google",
         "description": "Experimental - Ultra-fast generation (1M context)"
     }
 }
@@ -94,7 +115,7 @@ OUTPUT FORMAT:
 3. Dockerfile (if deployment mentioned)
 4. Brief README with usage instructions
-Context window: 64,000 tokens output (demo limit) - you can generate comprehensive solutions.
 Be complete and thorough. Focus on quality and production-readiness."""
@@ -122,7 +143,7 @@ def generate_code_with_model(prompt: str, model_name: str, temperature: float =
             client = anthropic.Anthropic(api_key=os.getenv(config["api_key_env"]))
             response = client.messages.create(
                 model=config["model"],
-                max_tokens=64000,  # Limited for demo stability
                 temperature=temperature,
                 system=SYSTEM_PROMPT,
                 messages=[{"role": "user", "content": prompt}]
@@ -170,7 +191,7 @@ def generate_code_with_model(prompt: str, model_name: str, temperature: float =
             model = genai.GenerativeModel(config["model"])
             response = model.generate_content(
                 f"{SYSTEM_PROMPT}\n\nUser request: {prompt}",
-                generation_config={"temperature": temperature, "max_output_tokens": 64000}  # Gemini 2.0 Flash supports up to 8K (65536 is max for SDK)
             )
             generated_code = response.text
             input_tokens = response.usage_metadata.prompt_token_count
@@ -338,7 +359,7 @@ with gr.Blocks(
         **Pure prompt evaluation:** Describe your requirements in detail. The AI will decide language, framework, and architecture based on your instructions.
-        **Context Window:** 64,000 tokens output
         """)
         with gr.Row():
@@ -386,7 +407,7 @@ with gr.Blocks(
         **Pure prompt evaluation:** Each model reads the same instructions and decides implementation details independently.
-        **Context Window:** 64,000 tokens output per model
         """)
         with gr.Row():

         "model": "claude-sonnet-4-20250514",
         "api_key_env": "ANTHROPIC_API_KEY",
         "cost_per_1M_tokens": 3.00,
+        "context_window": 32000,
         "tier": "premium",
         "description": "Best for complex architecture"
     },
         "context_window": 1000000,
         "tier": "free-google",
         "description": "Experimental - Ultra-fast generation (1M context)"
+    },
+    # === TIER 5: FREE HUGGINGFACE MODELS ===
+    "Qwen2.5-Coder-32B 🤗": {
+        "provider": "huggingface",
+        "model": "Qwen/Qwen2.5-Coder-32B-Instruct",
+        "api_key_env": "HF_TOKEN",
+        "cost_per_1M_tokens": 0.00,
+        "context_window": 32768,
+        "tier": "free-hf",
+        "description": "32B code specialist via HF Inference API (FREE)"
+    },
+    "Phi-4-mini 🤗": {
+        "provider": "huggingface",
+        "model": "microsoft/Phi-4-mini-instruct",
+        "api_key_env": "HF_TOKEN",
+        "cost_per_1M_tokens": 0.00,
+        "context_window": 16384,
+        "tier": "free-hf",
+        "description": "Microsoft's efficient code model via HF Inference API"
     }
 }
 3. Dockerfile (if deployment mentioned)
 4. Brief README with usage instructions
+Context window: 32,000 tokens output (demo limit) - you can generate comprehensive solutions.
 Be complete and thorough. Focus on quality and production-readiness."""
             client = anthropic.Anthropic(api_key=os.getenv(config["api_key_env"]))
             response = client.messages.create(
                 model=config["model"],
+                max_tokens=32000,  # Limited for demo stability
                 temperature=temperature,
                 system=SYSTEM_PROMPT,
                 messages=[{"role": "user", "content": prompt}]
             model = genai.GenerativeModel(config["model"])
             response = model.generate_content(
                 f"{SYSTEM_PROMPT}\n\nUser request: {prompt}",
+                generation_config={"temperature": temperature, "max_output_tokens": 32000}  # Gemini 2.0 Flash supports up to 8K (65536 is max for SDK)
             )
             generated_code = response.text
             input_tokens = response.usage_metadata.prompt_token_count
         **Pure prompt evaluation:** Describe your requirements in detail. The AI will decide language, framework, and architecture based on your instructions.
+        **Context Window:** 32,000 tokens output
         """)
         with gr.Row():
         **Pure prompt evaluation:** Each model reads the same instructions and decides implementation details independently.
+        **Context Window:** 32,000 tokens output per model
         """)
         with gr.Row():