Spaces:

lablab-ai-amd-developer-hackathon
/

ROCmPort-AI

Running

File size: 13,105 Bytes

1a6672d
a5be23e
1a6672d
a5be23e
 
 
 
 
 
 
 
 
 
 
 
 
 
1a6672d
 
 
 
a5be23e
1a6672d
a5be23e
1a6672d
 
3de7600
1a6672d
 
a5be23e
3de7600
1a6672d
 
 
3de7600
1a6672d
a5be23e
1a6672d
 
a5be23e
1a6672d
a5be23e
1a6672d
 
 
 
a5be23e
1a6672d
a5be23e
 
 
28263c0
a5be23e
 
 
 
 
 
 
 
28263c0
 
 
 
 
a5be23e
28263c0
 
 
 
1a6672d
 
 
0b5416e
 
 
a5be23e
 
 
 
 
 
 
 
 
 
 
1a6672d
 
 
 
a5be23e
1a6672d
 
a5be23e
 
 
 
 
1a6672d
 
a5be23e
 
1a6672d
a5be23e
 
1a6672d
 
 
a5be23e
 
 
 
 
 
 
 
 
1a6672d
a5be23e
 
 
 
 
1a6672d
 
a5be23e
1a6672d
a5be23e
1a6672d
 
a5be23e
 
 
 
 
 
 
 
 
 
 
 
 
1a6672d
 
a5be23e
 
 
 
 
1a6672d
 
 
a5be23e
 
 
 
1a6672d
 
a5be23e
1a6672d
 
a5be23e
 
 
 
 
 
1a6672d
 
a5be23e
 
 
 
 
1a6672d
 
 
a5be23e
 
 
 
 
1a6672d
 
a5be23e
1a6672d
 
 
a5be23e
 
 
 
 
 
1a6672d
 
 
 
a5be23e
 
 
 
 
 
 
1a6672d
 
 
a5be23e
 
 
 
1a6672d
 
a5be23e
 
 
 
 
 
1a6672d
 
 
 
 
 
 
a5be23e
1a6672d
 
a5be23e
1a6672d
 
a5be23e
 
 
 
 
 
 
1a6672d
a5be23e
 
 
 
 
1a6672d
 
 
a5be23e
 
 
 
 
1a6672d
 
a5be23e
1a6672d
 
 
 
 
 
 
 
 
 
a5be23e
1a6672d
 
 
 
 
a5be23e
1a6672d
 
a5be23e
1a6672d
a5be23e
 
1a6672d
 
 
a5be23e
1a6672d
 
a5be23e
1a6672d
a5be23e
1a6672d
a5be23e
28263c0
 
 
 
a5be23e
 
28263c0
 
 
 
 
a5be23e
984e3c2
 
28263c0
 
1a6672d
 
 
 
 
a5be23e
 
1a6672d
 
 
 
 
a5be23e
1a6672d
a5be23e
984e3c2
 
1a6672d
 
 
 
 
 
a5be23e
1a6672d
 
 
 
 
 
a5be23e
 
 
1a6672d
a5be23e

import asyncio
import json
from typing import AsyncGenerator

# pylint: disable=broad-exception-caught

from . import analyzer, optimizer, tester, translator
from ..models import (
    AgentEvent,
    AgentStatus,
    AnalyzerResult,
    CostEstimate,
    FinalReport,
    OptimizerResult,
    TesterResult,
    TranslatorResult,
    WorkloadType,
)


def calculate_cost_estimate(analyzer_result: AnalyzerResult) -> CostEstimate:
    """Calculate cost impact estimate based on code complexity."""
    complexity = analyzer_result.complexity_score or 5

    if complexity <= 3:
        manual_weeks = "1-2 weeks"
        savings = f"~{complexity * 5}-{complexity * 10} eng-days × team rate (complexity {complexity}/10)"
        factor = "Low"
    elif complexity <= 7:
        manual_weeks = "3-6 weeks"
        savings = f"~{complexity * 5}-{complexity * 10} eng-days × team rate (complexity {complexity}/10)"
        factor = "Medium"
    else:
        manual_weeks = "6-10 weeks"
        savings = f"~{complexity * 5}-{complexity * 10} eng-days × team rate (complexity {complexity}/10)"
        factor = "High"

    return CostEstimate(
        manual_porting_weeks=manual_weeks,
        rocmport_minutes="Varies by kernel",
        estimated_savings=savings,
        complexity_factor=factor,
    )


def simplify_explanation(report: FinalReport) -> str:
    """Convert technical explanation to simpler wording for explain mode."""
    simple_text = report.amd_advantage_explanation

    simple_text = simple_text.replace(
        "5.3 TB/s memory bandwidth", "much faster memory access")
    simple_text = simple_text.replace("3.35 TB/s", "slower memory access")
    simple_text = simple_text.replace(
        "memory-bound", "needs to move a lot of data")
    simple_text = simple_text.replace(
        "compute-bound", "does a lot of calculations")
    simple_text = simple_text.replace(
        "wavefront", "group of threads working together")
    simple_text = simple_text.replace(
        "shared memory tiling", "shares data between threads efficiently")
    simple_text = simple_text.replace("coalescing", "accesses memory in order")
    simple_text = simple_text.replace("optimization", "improvement")
    simple_text = simple_text.replace("performance", "speed")
    simple_text = simple_text.replace("benchmark", "test")
    simple_text = simple_text.replace("iteration", "try")

    simple_text = simple_text.replace("This kernel is", "This code is")
    simple_text = simple_text.replace("The optimization", "The improvement")
    simple_text = simple_text.replace("achieves", "gets")
    simple_text = simple_text.replace("demonstrates", "shows")
    return simple_text


# NOTE: run_pipeline below is NOT used by the active LangGraph pipeline.
# The active pipeline is backend/graph/pipeline.py (build_pipeline / pipeline).
# This function is kept for reference but is dead code.
async def run_pipeline(
    cuda_code: str,
    kernel_name: str = "custom",
    simple_mode: bool = False,
) -> AsyncGenerator[AgentEvent, None]:
    """Run full pipeline and stream AgentEvent objects."""
    yield AgentEvent(
        agent="analyzer",
        status=AgentStatus.RUNNING,
        message="Scanning CUDA code for kernels, APIs, and hardware-specific issues...",
    )

    try:
        analyzer_result: AnalyzerResult = await asyncio.to_thread(analyzer.run, cuda_code)
    except Exception as e:
        yield AgentEvent(agent="analyzer", status=AgentStatus.FAILED, message="Analysis failed", detail=str(e))
        return

    detail_parts = [
        f"Found {len(analyzer_result.kernels_found)} kernel(s): {', '.join(analyzer_result.kernels_found)}",
        f"Workload: {analyzer_result.workload_type.value}",
        f"Difficulty: {analyzer_result.difficulty} - {analyzer_result.difficulty_reason}",
    ]

    if analyzer_result.warp_size_issue:
        detail_parts.append(
            f"WARP SIZE ISSUE: {analyzer_result.warp_size_detail}")
    if analyzer_result.sharding_detected:
        detail_parts.append(
            "Multi-GPU sharding detected; review if needed on MI300X memory capacity.")
    if analyzer_result.prediction:
        detail_parts.append(analyzer_result.prediction)

    yield AgentEvent(
        agent="analyzer",
        status=AgentStatus.DONE,
        message=(
            f"Found {len(analyzer_result.kernels_found)} kernel(s) | "
            f"{analyzer_result.workload_type.value} workload | Difficulty: {analyzer_result.difficulty}"
        ),
        detail="\n".join(detail_parts),
    )

    yield AgentEvent(
        agent="translator",
        status=AgentStatus.RUNNING,
        message="Running hipify-clang (pass 1) then LLM correction (pass 2)...",
    )

    try:
        translator_result: TranslatorResult = await asyncio.to_thread(translator.run, cuda_code, analyzer_result)
    except Exception as e:
        yield AgentEvent(agent="translator", status=AgentStatus.FAILED, message="Translation failed", detail=str(e))
        return

    yield AgentEvent(
        agent="translator",
        status=AgentStatus.DONE,
        message=(
            f"{translator_result.total_changes} changes "
            f"({translator_result.hipify_changes} hipify + {translator_result.llm_changes} LLM)"
        ),
        detail=(
            f"Total changes: {translator_result.total_changes} "
            f"({translator_result.hipify_changes} hipify, {translator_result.llm_changes} LLM)\n"
            f"Warp size corrected: {analyzer_result.warp_size_issue}\n"
            "Kernel launch syntax updated"
        ),
    )

    yield AgentEvent(
        agent="optimizer",
        status=AgentStatus.RUNNING,
        message="Applying AMD MI300X-specific optimizations (iteration 1)...",
    )

    try:
        optimizer_result: OptimizerResult = await asyncio.to_thread(
            optimizer.run,
            translator_result.hip_code,
            analyzer_result,
            1,
        )
    except Exception as e:
        yield AgentEvent(agent="optimizer", status=AgentStatus.FAILED, message="Optimization failed", detail=str(e))
        return

    yield AgentEvent(
        agent="optimizer",
        status=AgentStatus.DONE,
        message=f"{len(optimizer_result.changes)} optimization(s) applied",
        detail="\n".join(
            f"- {c['description']}" for c in optimizer_result.changes),
    )

    yield AgentEvent(
        agent="tester",
        status=AgentStatus.RUNNING,
        message="Compiling with hipcc and profiling with rocprof (iteration 1)...",
    )

    try:
        tester_result_1: TesterResult = await asyncio.to_thread(
            tester.run,
            optimizer_result.optimized_code,
            analyzer_result,
            1,
            kernel_name,
        )
    except Exception as e:
        yield AgentEvent(agent="tester", status=AgentStatus.FAILED, message="Testing failed", detail=str(e))
        return

    if not tester_result_1.success:
        yield AgentEvent(
            agent="tester",
            status=AgentStatus.FAILED,
            message="Compilation or profiling failed",
            detail=tester_result_1.notes,
        )
        return

    if tester_result_1.speedup < 1.0:
        yield AgentEvent(
            agent="tester",
            status=AgentStatus.FAILED,
            message=f"Iteration 1: {tester_result_1.speedup}x vs baseline HIP (regression)",
            detail=(
                f"Bandwidth utilized: {tester_result_1.bandwidth_utilized}%\n"
                f"{tester_result_1.notes}"
            ),
        )

        yield AgentEvent(
            agent="coordinator",
            status=AgentStatus.RUNNING,
            message="Performance regressed, retrying optimizer with profiler feedback...",
            detail=f"Profiler feedback: {tester_result_1.notes}",
        )

        yield AgentEvent(
            agent="optimizer",
            status=AgentStatus.RETRYING,
            message="Trying alternative optimization strategy (iteration 2)...",
            detail=f"Previous strategy regressed. Feedback: {tester_result_1.notes}",
        )

        try:
            optimizer_result_2: OptimizerResult = await asyncio.to_thread(
                optimizer.run,
                translator_result.hip_code,
                analyzer_result,
                2,
                tester_result_1.notes,
            )
        except Exception as e:
            yield AgentEvent(agent="optimizer", status=AgentStatus.FAILED, message="Re-optimization failed", detail=str(e))
            return

        yield AgentEvent(
            agent="optimizer",
            status=AgentStatus.DONE,
            message=f"Alternative strategy: {len(optimizer_result_2.changes)} change(s) applied",
            detail="\n".join(
                f"- {c['description']}" for c in optimizer_result_2.changes),
        )

        yield AgentEvent(
            agent="tester",
            status=AgentStatus.RUNNING,
            message="Re-profiling with alternative optimization (iteration 2)...",
        )

        try:
            tester_result_final: TesterResult = await asyncio.to_thread(
                tester.run,
                optimizer_result_2.optimized_code,
                analyzer_result,
                2,
                kernel_name,
            )
        except Exception as e:
            yield AgentEvent(agent="tester", status=AgentStatus.FAILED, message="Re-testing failed", detail=str(e))
            return

        final_optimizer = optimizer_result_2
    else:
        tester_result_final = tester_result_1
        final_optimizer = optimizer_result

    yield AgentEvent(
        agent="tester",
        status=AgentStatus.DONE,
        message=f"Iteration {tester_result_final.iteration}: {tester_result_final.speedup}x vs baseline HIP",
        detail=(
            f"Execution time: {tester_result_final.execution_ms:.1f}ms\n"
            f"Memory bandwidth: {tester_result_final.bandwidth_utilized:.1f}% utilized\n"
            f"Bottleneck type: {tester_result_final.bottleneck}\n"
            f"{tester_result_final.notes}"
        ),
    )

    yield AgentEvent(agent="coordinator", status=AgentStatus.RUNNING, message="Generating migration report...")

    amd_explanation = _build_amd_explanation(
        analyzer_result, tester_result_final)

    try:
        cost_estimate = calculate_cost_estimate(analyzer_result)
    except Exception:
        cost_estimate = CostEstimate(
            manual_porting_weeks="3-6 weeks",
            rocmport_minutes="Varies by kernel",
            estimated_savings="$20,000-$50,000",
            complexity_factor="Medium",
        )

    temp_report = FinalReport(
        migration_success=True,
        speedup=tester_result_final.speedup,
        bandwidth_utilized=tester_result_final.bandwidth_utilized,
        total_changes=translator_result.total_changes +
        len(final_optimizer.changes),
        bottleneck=tester_result_final.bottleneck,
        amd_advantage_explanation=amd_explanation,
        iterations=tester_result_final.iteration,
        hip_code=translator_result.hip_code,
        optimized_code=final_optimizer.optimized_code,
        verification=tester_result_final.verification,
        static_risk_report=analyzer_result.static_risk_report,
        data_source=tester_result_final.data_source or "simulated",
    )
    simplified_explanation = simplify_explanation(temp_report)

    report = FinalReport(
        migration_success=True,
        speedup=tester_result_final.speedup,
        bandwidth_utilized=tester_result_final.bandwidth_utilized,
        total_changes=translator_result.total_changes +
        len(final_optimizer.changes),
        bottleneck=tester_result_final.bottleneck,
        amd_advantage_explanation=amd_explanation,
        iterations=tester_result_final.iteration,
        hip_code=translator_result.hip_code,
        optimized_code=final_optimizer.optimized_code,
        verification=tester_result_final.verification,
        cost_estimate=cost_estimate,
        simplified_explanation=simplified_explanation,
        static_risk_report=analyzer_result.static_risk_report,
        data_source=tester_result_final.data_source or "simulated",
    )

    yield AgentEvent(
        agent="coordinator",
        status=AgentStatus.DONE,
        message="Migration complete",
        detail=json.dumps(report.model_dump()),
    )


def _build_amd_explanation(analyzer_result: AnalyzerResult, tester_result: TesterResult) -> str:
    if analyzer_result.workload_type == WorkloadType.MEMORY_BOUND:
        return (
            "This is a memory-bound kernel; performance scales with memory bandwidth. "
            "MI300X provides higher memory bandwidth than H100-class hardware, and this workload "
            f"reached {tester_result.bandwidth_utilized:.0f}% utilization after optimization."
        )
    return (
        "This is a compute-bound kernel; launch geometry and wavefront-aware tuning are key drivers. "
        "After optimization, compute utilization and execution characteristics improved."
    )