kimi-architect / app.py
nlopes90's picture
Initial kimi-architect MCP server space
3ac0f66
import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient("moonshotai/Kimi-K2-Instruct")
ARCHITECT_SYSTEM = (
"You are a senior software architect specializing in Python/FastAPI backends, "
"Next.js/React frontends, and LangGraph/LangChain AI systems. "
"You think in systems — data flow, component boundaries, and interfaces. "
"You always consider: scalability, testability, separation of concerns, "
"and developer experience. "
"Output file trees when relevant. Use markdown tables for comparisons. "
"Reference specific patterns: repository pattern, service layer, CQRS, "
"event-driven architecture, App Router conventions, React Server Components. "
"Be opinionated but justify your choices."
)
def analyze_design(
requirements: str,
stack: str = "Python/FastAPI + Next.js + LangGraph",
max_tokens: int = 2048,
) -> str:
"""
Produce an architecture plan from requirements.
Outputs component diagram, data flow, API design, and tech decisions.
Args:
requirements: Project requirements or feature description.
Be as detailed as possible for better results.
stack: Tech stack to use (default "Python/FastAPI + Next.js + LangGraph").
max_tokens: Maximum tokens for the response (default 2048).
Returns:
Architecture plan with components, data flow, API design, and rationale.
"""
response = client.chat_completion(
messages=[
{"role": "system", "content": ARCHITECT_SYSTEM},
{
"role": "user",
"content": (
f"Requirements:\n{requirements}\n\n"
f"Stack: {stack}\n\n"
"Produce an architecture plan covering:\n"
"1. **Components**: List each service/module with its responsibility\n"
"2. **Data Flow**: How data moves through the system\n"
"3. **API Design**: Key endpoints with method, path, request/response\n"
"4. **File Structure**: Directory tree for the project\n"
"5. **Tech Decisions**: Libraries chosen and why\n"
"6. **Risks**: What could go wrong and mitigations"
),
},
],
max_tokens=max_tokens,
)
return response.choices[0].message.content
def suggest_patterns(
problem: str,
stack: str = "Python/FastAPI",
max_tokens: int = 2048,
) -> str:
"""
Recommend design patterns for a specific problem and stack.
Args:
problem: The design problem to solve (e.g. "handle complex business logic
with multiple validation steps", "manage state across LangGraph nodes").
stack: The technology stack context. One of: "Python/FastAPI",
"Next.js/React", "LangGraph/LangChain", or a combination.
Returns:
Recommended patterns with code examples and trade-offs.
"""
response = client.chat_completion(
messages=[
{"role": "system", "content": ARCHITECT_SYSTEM},
{
"role": "user",
"content": (
f"Problem: {problem}\n"
f"Stack: {stack}\n\n"
"Recommend design patterns. For each pattern:\n"
"1. **Name** and brief description\n"
"2. **Why** it fits this problem\n"
"3. **Code example** (concise, idiomatic for the stack)\n"
"4. **Trade-offs** (when NOT to use it)\n\n"
"Suggest 2-3 patterns ranked by fit."
),
},
],
max_tokens=max_tokens,
)
return response.choices[0].message.content
def plan_implementation(
task: str,
stack: str = "Python/FastAPI + Next.js + LangGraph",
max_tokens: int = 2048,
) -> str:
"""
Create a step-by-step implementation plan with file structure.
Args:
task: The feature or project to implement (e.g. "RAG pipeline with
document upload, chunking, embedding, and retrieval via FastAPI").
stack: Tech stack to use (default "Python/FastAPI + Next.js + LangGraph").
max_tokens: Maximum tokens for the response (default 2048).
Returns:
Ordered implementation steps with file paths, dependencies, and code snippets.
"""
response = client.chat_completion(
messages=[
{"role": "system", "content": ARCHITECT_SYSTEM},
{
"role": "user",
"content": (
f"Task: {task}\n"
f"Stack: {stack}\n\n"
"Create a step-by-step implementation plan:\n"
"1. **File tree**: Complete directory structure\n"
"2. **Dependencies**: All packages needed (requirements.txt / package.json)\n"
"3. **Implementation order**: Numbered steps, each with:\n"
" - File to create/modify\n"
" - What to implement\n"
" - Key code snippet or interface\n"
"4. **Integration points**: How components connect\n"
"5. **Verification**: How to test each step works"
),
},
],
max_tokens=max_tokens,
)
return response.choices[0].message.content
def review_architecture(
code: str,
max_tokens: int = 2048,
) -> str:
"""
Review existing code for architectural issues, coupling, and scalability.
Args:
code: The source code to review architecturally.
Can be a single file or multiple files separated by file path comments.
Returns:
Architectural review with issues, recommendations, and refactoring suggestions.
"""
response = client.chat_completion(
messages=[
{"role": "system", "content": ARCHITECT_SYSTEM},
{
"role": "user",
"content": (
f"Review this code architecturally:\n\n{code}\n\n"
"Analyze:\n"
"1. **Coupling**: Are components too tightly coupled?\n"
"2. **Cohesion**: Does each module have a single responsibility?\n"
"3. **Scalability**: What breaks under load?\n"
"4. **Testability**: Can each component be tested in isolation?\n"
"5. **Patterns**: Are appropriate patterns used (or missing)?\n"
"6. **Recommendations**: Specific refactoring steps, ordered by impact"
),
},
],
max_tokens=max_tokens,
)
return response.choices[0].message.content
demo = gr.TabbedInterface(
[
gr.Interface(
fn=analyze_design,
inputs=[
gr.Textbox(label="Requirements", placeholder="Describe the project or feature...", lines=6),
gr.Textbox(label="Stack", value="Python/FastAPI + Next.js + LangGraph"),
gr.Slider(minimum=256, maximum=4096, value=2048, step=256, label="Max Tokens"),
],
outputs=gr.Textbox(label="Architecture Plan", lines=20),
title="Analyze Design",
api_name="analyze_design",
),
gr.Interface(
fn=suggest_patterns,
inputs=[
gr.Textbox(label="Problem", placeholder="Describe the design problem...", lines=4),
gr.Textbox(label="Stack", value="Python/FastAPI"),
],
outputs=gr.Textbox(label="Pattern Recommendations", lines=20),
title="Suggest Patterns",
api_name="suggest_patterns",
),
gr.Interface(
fn=plan_implementation,
inputs=[
gr.Textbox(label="Task", placeholder="What to implement...", lines=4),
gr.Textbox(label="Stack", value="Python/FastAPI + Next.js + LangGraph"),
gr.Slider(minimum=256, maximum=4096, value=2048, step=256, label="Max Tokens"),
],
outputs=gr.Textbox(label="Implementation Plan", lines=20),
title="Plan Implementation",
api_name="plan_implementation",
),
gr.Interface(
fn=review_architecture,
inputs=gr.Code(label="Code to Review", language="python", lines=20),
outputs=gr.Textbox(label="Architectural Review", lines=20),
title="Review Architecture",
api_name="review_architecture",
),
],
["Design", "Patterns", "Plan", "Review"],
title="Kimi Architect (MCP)",
)
if __name__ == "__main__":
demo.launch(mcp_server=True, ssr_mode=False)