Spaces:

nlopes90
/

kimi-tester

Sleeping

App Files Files Community

kimi-tester / app.py

nlopes90

Initial kimi-tester MCP server space

88c4f98 2 months ago

raw

history blame contribute delete

9.09 kB

	import gradio as gr
	from huggingface_hub import InferenceClient

	client = InferenceClient("moonshotai/Kimi-K2-Instruct")

	TESTER_SYSTEM = (
	"You are a senior test engineer who is adversarial by nature. "
	"Your job is to BREAK code — find the inputs that cause failures, "
	"the edge cases nobody thought of, the race conditions that hide in production. "
	"You write tests using pytest (Python) or vitest (TypeScript/JavaScript). "
	"You always use: fixtures, parametrize, mocking, and property-based testing where appropriate. "
	"You cover: happy path, error path, boundary values, type coercion, "
	"concurrency, empty inputs, None/null, Unicode, large inputs, and negative values. "
	"Every test has a clear name that describes WHAT it tests and WHY."
	)


	def generate_tests(
	code: str,
	framework: str = "pytest",
	max_tokens: int = 2048,
	) -> str:
	"""
	Generate a comprehensive test suite for the given code.

	Args:
	code: The source code to test. Can be a function, class, or module.
	framework: Test framework to use: "pytest" (Python) or "vitest" (TypeScript).
	Default is "pytest".
	max_tokens: Maximum tokens for test generation (default 2048).

	Returns:
	Complete test file with imports, fixtures, and test cases.
	"""
	response = client.chat_completion(
	messages=[
	{"role": "system", "content": TESTER_SYSTEM},
	{
	"role": "user",
	"content": (
	f"Generate a comprehensive {framework} test suite for this code:\n\n"
	f"```\n{code}\n```\n\n"
	"Requirements:\n"
	"- Use fixtures for setup/teardown\n"
	"- Use parametrize for multiple input combinations\n"
	"- Cover: happy path, error cases, boundary values, edge cases\n"
	"- Mock external dependencies if any\n"
	"- Include docstrings explaining what each test validates\n"
	"- Return ONLY the test file, ready to run"
	),
	},
	],
	max_tokens=max_tokens,
	)
	return response.choices[0].message.content


	def find_edge_cases(
	code: str,
	max_tokens: int = 2048,
	) -> str:
	"""
	Analyze code and identify edge cases, boundary conditions, and failure modes.
	Think like an attacker trying to break the code.

	Args:
	code: The source code to analyze for edge cases.

	Returns:
	Categorized list of edge cases with example inputs that would trigger them.
	"""
	response = client.chat_completion(
	messages=[
	{"role": "system", "content": TESTER_SYSTEM},
	{
	"role": "user",
	"content": (
	f"Analyze this code for edge cases and failure modes:\n\n"
	f"```\n{code}\n```\n\n"
	"For each edge case found:\n"
	"1. Category: (boundary, type, concurrency, resource, security, logic)\n"
	"2. Description: What the edge case is\n"
	"3. Trigger input: The exact input that would expose it\n"
	"4. Expected behavior: What SHOULD happen\n"
	"5. Actual behavior: What WOULD happen (bug or correct)\n"
	"6. Severity: CRITICAL / HIGH / MEDIUM / LOW\n\n"
	"Be adversarial. Think like a fuzzer. Find at least 8 edge cases."
	),
	},
	],
	max_tokens=max_tokens,
	)
	return response.choices[0].message.content


	def generate_integration_tests(
	api_spec: str,
	framework: str = "pytest + httpx",
	max_tokens: int = 2048,
	) -> str:
	"""
	Generate integration tests for API endpoints (FastAPI or Next.js API routes).

	Args:
	api_spec: API specification — can be endpoint code, OpenAPI spec,
	or a description of endpoints with methods, paths, and expected behavior.
	framework: Test framework: "pytest + httpx" (FastAPI) or "vitest + fetch"
	(Next.js). Default is "pytest + httpx".
	max_tokens: Maximum tokens for test generation (default 2048).

	Returns:
	Integration test file with setup, teardown, and test cases for each endpoint.
	"""
	response = client.chat_completion(
	messages=[
	{"role": "system", "content": TESTER_SYSTEM},
	{
	"role": "user",
	"content": (
	f"Generate integration tests ({framework}) for this API:\n\n"
	f"```\n{api_spec}\n```\n\n"
	"Requirements:\n"
	"- Test each endpoint: success, validation errors, auth errors, not found\n"
	"- Use async test client (httpx.AsyncClient for FastAPI)\n"
	"- Test request/response schemas match\n"
	"- Test error response format consistency\n"
	"- Include setup/teardown for test data\n"
	"- Test rate limiting if applicable\n"
	"- Return ONLY the test file, ready to run"
	),
	},
	],
	max_tokens=max_tokens,
	)
	return response.choices[0].message.content


	def test_strategy(
	requirements: str,
	stack: str = "Python/FastAPI + Next.js",
	max_tokens: int = 2048,
	) -> str:
	"""
	Create a comprehensive test strategy and plan for a feature or project.

	Args:
	requirements: Project or feature requirements to create a test plan for.
	stack: Tech stack context (default "Python/FastAPI + Next.js").
	max_tokens: Maximum tokens for the response (default 2048).

	Returns:
	Test strategy with coverage matrix, test types, priorities, and tooling.
	"""
	response = client.chat_completion(
	messages=[
	{"role": "system", "content": TESTER_SYSTEM},
	{
	"role": "user",
	"content": (
	f"Create a test strategy for:\n\n{requirements}\n\n"
	f"Stack: {stack}\n\n"
	"Include:\n"
	"1. Test Pyramid: Unit / Integration / E2E split with counts\n"
	"2. Coverage Matrix: Table of features × test types\n"
	"3. Critical Paths: What MUST be tested (ranked by risk)\n"
	"4. Edge Cases: Top 10 edge cases to cover\n"
	"5. Tooling: Frameworks, mocking libraries, CI setup\n"
	"6. Test Data: What fixtures/factories are needed\n"
	"7. Automation: What can be auto-generated vs hand-written"
	),
	},
	],
	max_tokens=max_tokens,
	)
	return response.choices[0].message.content


	demo = gr.TabbedInterface(
	[
	gr.Interface(
	fn=generate_tests,
	inputs=[
	gr.Code(label="Code to Test", language="python", lines=15),
	gr.Dropdown(choices=["pytest", "vitest"], value="pytest", label="Framework"),
	gr.Slider(minimum=256, maximum=4096, value=2048, step=256, label="Max Tokens"),
	],
	outputs=gr.Code(label="Test Suite"),
	title="Generate Tests",
	api_name="generate_tests",
	),
	gr.Interface(
	fn=find_edge_cases,
	inputs=gr.Code(label="Code to Analyze", language="python", lines=15),
	outputs=gr.Textbox(label="Edge Cases", lines=20),
	title="Find Edge Cases",
	api_name="find_edge_cases",
	),
	gr.Interface(
	fn=generate_integration_tests,
	inputs=[
	gr.Code(label="API Spec / Endpoint Code", language="python", lines=15),
	gr.Dropdown(
	choices=["pytest + httpx", "vitest + fetch"],
	value="pytest + httpx",
	label="Framework",
	),
	gr.Slider(minimum=256, maximum=4096, value=2048, step=256, label="Max Tokens"),
	],
	outputs=gr.Code(label="Integration Tests"),
	title="Integration Tests",
	api_name="generate_integration_tests",
	),
	gr.Interface(
	fn=test_strategy,
	inputs=[
	gr.Textbox(label="Requirements", placeholder="Describe the feature or project...", lines=6),
	gr.Textbox(label="Stack", value="Python/FastAPI + Next.js"),
	gr.Slider(minimum=256, maximum=4096, value=2048, step=256, label="Max Tokens"),
	],
	outputs=gr.Textbox(label="Test Strategy", lines=20),
	title="Test Strategy",
	api_name="test_strategy",
	),
	],
	["Generate", "Edge Cases", "Integration", "Strategy"],
	title="Kimi Tester (MCP)",
	)

	if __name__ == "__main__":
	demo.launch(mcp_server=True, ssr_mode=False)