kimi-tester / app.py
nlopes90's picture
Initial kimi-tester MCP server space
88c4f98
import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient("moonshotai/Kimi-K2-Instruct")
TESTER_SYSTEM = (
"You are a senior test engineer who is adversarial by nature. "
"Your job is to BREAK code — find the inputs that cause failures, "
"the edge cases nobody thought of, the race conditions that hide in production. "
"You write tests using pytest (Python) or vitest (TypeScript/JavaScript). "
"You always use: fixtures, parametrize, mocking, and property-based testing where appropriate. "
"You cover: happy path, error path, boundary values, type coercion, "
"concurrency, empty inputs, None/null, Unicode, large inputs, and negative values. "
"Every test has a clear name that describes WHAT it tests and WHY."
)
def generate_tests(
code: str,
framework: str = "pytest",
max_tokens: int = 2048,
) -> str:
"""
Generate a comprehensive test suite for the given code.
Args:
code: The source code to test. Can be a function, class, or module.
framework: Test framework to use: "pytest" (Python) or "vitest" (TypeScript).
Default is "pytest".
max_tokens: Maximum tokens for test generation (default 2048).
Returns:
Complete test file with imports, fixtures, and test cases.
"""
response = client.chat_completion(
messages=[
{"role": "system", "content": TESTER_SYSTEM},
{
"role": "user",
"content": (
f"Generate a comprehensive {framework} test suite for this code:\n\n"
f"```\n{code}\n```\n\n"
"Requirements:\n"
"- Use fixtures for setup/teardown\n"
"- Use parametrize for multiple input combinations\n"
"- Cover: happy path, error cases, boundary values, edge cases\n"
"- Mock external dependencies if any\n"
"- Include docstrings explaining what each test validates\n"
"- Return ONLY the test file, ready to run"
),
},
],
max_tokens=max_tokens,
)
return response.choices[0].message.content
def find_edge_cases(
code: str,
max_tokens: int = 2048,
) -> str:
"""
Analyze code and identify edge cases, boundary conditions, and failure modes.
Think like an attacker trying to break the code.
Args:
code: The source code to analyze for edge cases.
Returns:
Categorized list of edge cases with example inputs that would trigger them.
"""
response = client.chat_completion(
messages=[
{"role": "system", "content": TESTER_SYSTEM},
{
"role": "user",
"content": (
f"Analyze this code for edge cases and failure modes:\n\n"
f"```\n{code}\n```\n\n"
"For each edge case found:\n"
"1. **Category**: (boundary, type, concurrency, resource, security, logic)\n"
"2. **Description**: What the edge case is\n"
"3. **Trigger input**: The exact input that would expose it\n"
"4. **Expected behavior**: What SHOULD happen\n"
"5. **Actual behavior**: What WOULD happen (bug or correct)\n"
"6. **Severity**: CRITICAL / HIGH / MEDIUM / LOW\n\n"
"Be adversarial. Think like a fuzzer. Find at least 8 edge cases."
),
},
],
max_tokens=max_tokens,
)
return response.choices[0].message.content
def generate_integration_tests(
api_spec: str,
framework: str = "pytest + httpx",
max_tokens: int = 2048,
) -> str:
"""
Generate integration tests for API endpoints (FastAPI or Next.js API routes).
Args:
api_spec: API specification — can be endpoint code, OpenAPI spec,
or a description of endpoints with methods, paths, and expected behavior.
framework: Test framework: "pytest + httpx" (FastAPI) or "vitest + fetch"
(Next.js). Default is "pytest + httpx".
max_tokens: Maximum tokens for test generation (default 2048).
Returns:
Integration test file with setup, teardown, and test cases for each endpoint.
"""
response = client.chat_completion(
messages=[
{"role": "system", "content": TESTER_SYSTEM},
{
"role": "user",
"content": (
f"Generate integration tests ({framework}) for this API:\n\n"
f"```\n{api_spec}\n```\n\n"
"Requirements:\n"
"- Test each endpoint: success, validation errors, auth errors, not found\n"
"- Use async test client (httpx.AsyncClient for FastAPI)\n"
"- Test request/response schemas match\n"
"- Test error response format consistency\n"
"- Include setup/teardown for test data\n"
"- Test rate limiting if applicable\n"
"- Return ONLY the test file, ready to run"
),
},
],
max_tokens=max_tokens,
)
return response.choices[0].message.content
def test_strategy(
requirements: str,
stack: str = "Python/FastAPI + Next.js",
max_tokens: int = 2048,
) -> str:
"""
Create a comprehensive test strategy and plan for a feature or project.
Args:
requirements: Project or feature requirements to create a test plan for.
stack: Tech stack context (default "Python/FastAPI + Next.js").
max_tokens: Maximum tokens for the response (default 2048).
Returns:
Test strategy with coverage matrix, test types, priorities, and tooling.
"""
response = client.chat_completion(
messages=[
{"role": "system", "content": TESTER_SYSTEM},
{
"role": "user",
"content": (
f"Create a test strategy for:\n\n{requirements}\n\n"
f"Stack: {stack}\n\n"
"Include:\n"
"1. **Test Pyramid**: Unit / Integration / E2E split with counts\n"
"2. **Coverage Matrix**: Table of features × test types\n"
"3. **Critical Paths**: What MUST be tested (ranked by risk)\n"
"4. **Edge Cases**: Top 10 edge cases to cover\n"
"5. **Tooling**: Frameworks, mocking libraries, CI setup\n"
"6. **Test Data**: What fixtures/factories are needed\n"
"7. **Automation**: What can be auto-generated vs hand-written"
),
},
],
max_tokens=max_tokens,
)
return response.choices[0].message.content
demo = gr.TabbedInterface(
[
gr.Interface(
fn=generate_tests,
inputs=[
gr.Code(label="Code to Test", language="python", lines=15),
gr.Dropdown(choices=["pytest", "vitest"], value="pytest", label="Framework"),
gr.Slider(minimum=256, maximum=4096, value=2048, step=256, label="Max Tokens"),
],
outputs=gr.Code(label="Test Suite"),
title="Generate Tests",
api_name="generate_tests",
),
gr.Interface(
fn=find_edge_cases,
inputs=gr.Code(label="Code to Analyze", language="python", lines=15),
outputs=gr.Textbox(label="Edge Cases", lines=20),
title="Find Edge Cases",
api_name="find_edge_cases",
),
gr.Interface(
fn=generate_integration_tests,
inputs=[
gr.Code(label="API Spec / Endpoint Code", language="python", lines=15),
gr.Dropdown(
choices=["pytest + httpx", "vitest + fetch"],
value="pytest + httpx",
label="Framework",
),
gr.Slider(minimum=256, maximum=4096, value=2048, step=256, label="Max Tokens"),
],
outputs=gr.Code(label="Integration Tests"),
title="Integration Tests",
api_name="generate_integration_tests",
),
gr.Interface(
fn=test_strategy,
inputs=[
gr.Textbox(label="Requirements", placeholder="Describe the feature or project...", lines=6),
gr.Textbox(label="Stack", value="Python/FastAPI + Next.js"),
gr.Slider(minimum=256, maximum=4096, value=2048, step=256, label="Max Tokens"),
],
outputs=gr.Textbox(label="Test Strategy", lines=20),
title="Test Strategy",
api_name="test_strategy",
),
],
["Generate", "Edge Cases", "Integration", "Strategy"],
title="Kimi Tester (MCP)",
)
if __name__ == "__main__":
demo.launch(mcp_server=True, ssr_mode=False)