File size: 14,673 Bytes
a5be23e
 
 
 
485813e
a5be23e
 
 
 
 
1a6672d
 
 
5504db3
1a6672d
28263c0
 
 
 
 
 
1a6672d
 
 
a5be23e
1a6672d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
984e3c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a6672d
 
 
 
 
485813e
 
1a6672d
 
 
 
485813e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a6672d
485813e
1a6672d
485813e
0b5416e
 
 
 
 
485813e
 
 
 
28263c0
485813e
1a6672d
 
 
 
 
 
 
485813e
1a6672d
 
 
a5be23e
485813e
a5be23e
 
 
485813e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5be23e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a6672d
 
 
 
 
 
 
 
 
a5be23e
1a6672d
 
a5be23e
1a6672d
 
 
 
 
 
 
 
 
 
 
a5be23e
1a6672d
 
a5be23e
1a6672d
 
 
 
a5be23e
1a6672d
a5be23e
 
1a6672d
 
 
 
 
 
 
 
 
 
5504db3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a6672d
 
 
 
 
 
 
 
 
 
 
 
a5be23e
1a6672d
5504db3
a5be23e
5504db3
a5be23e
1a6672d
 
 
 
a5be23e
 
1a6672d
a5be23e
7e7728f
 
1a6672d
a5be23e
 
1a6672d
 
 
 
 
 
 
 
 
a5be23e
1a6672d
 
 
 
243b15a
 
 
0b5416e
 
1a6672d
a5be23e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
# pylint: disable=broad-exception-caught

from backend.agents.analyzer import AnalyzerResult, WorkloadType
from backend.agents.tester import run as run_tester
from backend.graph.pipeline import pipeline as migration_pipeline
from backend.models import PortRequest, ColdStartRequest, AggregateMetricsRequest
from fastapi.staticfiles import StaticFiles
from fastapi.responses import StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi import FastAPI, HTTPException
import json
import asyncio
import zipfile
import io
import os
import difflib
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()


app = FastAPI(
    title="ROCmPort AI",
    description="CUDA-to-ROCm migration assistant with iterative testing and optimization.",
    version="1.0.0",
    contact={
        "name": "Tazwar Ahnaf Enan",
        "url": "https://github.com/tazwaryayyyy",
        "email": "tazwardevp@gmail.com",
    },
    license_info={
        "name": "MIT",
    },
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)


@app.get("/health")
async def health():
    from backend.agents.analyzer import llm_client
    return {
        "status": "ok",
        "service": "ROCmPort AI",
        "llm_provider": llm_client.get_model_info(),
        "rocm_available": os.environ.get("ROCM_AVAILABLE", "false").lower() == "true",
    }


@app.get("/benchmark-report")
async def benchmark_report():
    """
    Returns a fully auditable benchmark report with:
    - Per-kernel deterministic performance data (data_source labelled)
    - Static risk scan results for each demo kernel
    - Hardware context and reproducibility instructions
    - LLM provider information

    Judges can use this endpoint to audit every metric shown in the UI.
    """
    from backend.tools.demo_artifacts import get_benchmark_summary
    from backend.tools import static_analyzer
    from backend.agents.analyzer import llm_client
    import os

    kernels_dir = os.path.join(os.path.dirname(__file__), "demo_kernels")
    summary = get_benchmark_summary()

    # Attach static risk scan for each demo kernel
    kernel_risk_scans = {}
    for fname in os.listdir(kernels_dir):
        if fname.endswith(".cu"):
            kname = fname.replace(".cu", "")
            with open(os.path.join(kernels_dir, fname), encoding="utf-8") as f:
                cuda_code = f.read()
            report = static_analyzer.scan(cuda_code)
            kernel_risk_scans[kname] = {
                "critical_count": report.critical_count,
                "high_count": report.high_count,
                "medium_count": report.medium_count,
                "scan_duration_ms": report.scan_duration_ms,
                "items": [item.model_dump() for item in report.items],
            }

    summary["static_risk_scans"] = kernel_risk_scans
    summary["llm_provider"] = llm_client.get_model_info()

    return summary


@app.post("/port")
async def port_cuda_code(req: PortRequest):
    """
    Main endpoint. Streams SSE events as the LangGraph pipeline runs.
    Each event is a JSON object matching the AgentEvent schema.
    """
    if not req.cuda_code or len(req.cuda_code.strip()) < 10:
        raise HTTPException(status_code=400, detail="No CUDA code provided")

    queue: asyncio.Queue = asyncio.Queue()

    async def _run_graph():
        initial_state = {
            "cuda_code": req.cuda_code,
            "kernel_name": req.kernel_name or "custom",
            "simple_mode": req.simple_mode or False,
            "analyzer_result": None,
            "translator_result": None,
            "optimizer_result": None,
            "tester_result": None,
            "iteration": 0,
            "max_iterations": 3,
            "should_retry": False,
            "migration_success": False,
            "final_report": {},
            "events": [],
        }
        try:
            async for chunk in migration_pipeline.astream(
                initial_state, stream_mode="updates"
            ):
                for _node_name, node_output in chunk.items():
                    for event in node_output.get("events", []):
                        await queue.put(event)
                        await asyncio.sleep(0.05)  # let client breathe
        except Exception as exc:
            await queue.put(
                {
                    "agent": "coordinator",
                    "status": "failed",
                    "message": "Pipeline error",
                    "detail": str(exc),
                }
            )
        finally:
            await queue.put(None)  # sentinel

    async def event_stream():
        task = asyncio.create_task(_run_graph())
        try:
            while True:
                try:
                    event = await asyncio.wait_for(queue.get(), timeout=120.0)
                except asyncio.TimeoutError:
                    yield "data: [DONE]\n\n"
                    break
                if event is None:
                    yield "data: [DONE]\n\n"
                    break
                yield f"data: {json.dumps(event)}\n\n"
        finally:
            task.cancel()

    return StreamingResponse(
        event_stream(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "X-Accel-Buffering": "no",
        },
    )


async def _collect_pipeline_events(cuda_code: str, kernel_name: str, simple_mode: bool = False) -> tuple[list[dict], dict | None]:
    """Collect all pipeline events via LangGraph and extract the final report."""
    events: list[dict] = []
    final_report = None

    initial_state = {
        "cuda_code": cuda_code,
        "kernel_name": kernel_name,
        "simple_mode": simple_mode,
        "analyzer_result": None,
        "translator_result": None,
        "optimizer_result": None,
        "tester_result": None,
        "iteration": 0,
        "max_iterations": 3,
        "should_retry": False,
        "migration_success": False,
        "final_report": {},
        "events": [],
    }

    async for chunk in migration_pipeline.astream(initial_state, stream_mode="updates"):
        for _node_name, node_output in chunk.items():
            for event in node_output.get("events", []):
                events.append(event)
                if (
                    event.get("agent") == "coordinator"
                    and event.get("status") == "done"
                    and event.get("detail")
                ):
                    try:
                        final_report = json.loads(event["detail"])
                    except (json.JSONDecodeError, TypeError):
                        final_report = None

    return events, final_report


def _has_adaptation_loop(events: list[dict]) -> bool:
    """Return True when the run shows retry-based adaptation behavior."""
    saw_regression = any(
        e.get("agent") == "tester" and e.get(
            "status") == "failed" and "regression" in str(e.get("message", "")).lower()
        for e in events
    )
    saw_retry = any(
        e.get("agent") == "optimizer" and e.get("status") == "retrying"
        for e in events
    )
    return saw_regression and saw_retry


@app.post("/cold-start")
async def cold_start_run(req: ColdStartRequest):
    """
    Single-run endpoint for unknown pasted CUDA input.
    Returns full trace plus summary trust signals.
    """
    if not req.cuda_code or len(req.cuda_code.strip()) < 10:
        raise HTTPException(status_code=400, detail="No CUDA code provided")

    events, report = await _collect_pipeline_events(req.cuda_code, req.kernel_name or "unknown_input", False)

    if report is None:
        raise HTTPException(
            status_code=500, detail="Pipeline completed without final report")

    return {
        "success": True,
        "kernel_name": req.kernel_name or "unknown_input",
        "adaptation_loop_observed": _has_adaptation_loop(events),
        "event_count": len(events),
        "report": report,
        "events": events,
    }


@app.post("/aggregate-metric")
async def aggregate_metric(req: AggregateMetricsRequest):
    """
    Evaluate multiple kernels and return one aggregate metric:
    average speedup vs baseline HIP.
    """
    kernels_dir = os.path.join(os.path.dirname(__file__), "demo_kernels")
    requested = req.kernel_names or []

    available: dict[str, str] = {}
    for fname in os.listdir(kernels_dir):
        if fname.endswith(".cu"):
            kname = fname.replace(".cu", "")
            with open(os.path.join(kernels_dir, fname), encoding="utf-8") as f:
                available[kname] = f.read()

    selected_names = requested if requested else sorted(available.keys())
    selected_names = [name for name in selected_names if name in available]

    if not selected_names:
        raise HTTPException(
            status_code=400, detail="No valid kernels selected for aggregation")

    runs = []
    speedups = []

    for name in selected_names:
        events, report = await _collect_pipeline_events(available[name], name, False)
        if report is None:
            continue

        speedup = float(report.get("speedup", 0.0) or 0.0)
        speedups.append(speedup)
        runs.append({
            "kernel": name,
            "speedup": speedup,
            "adaptation_loop_observed": _has_adaptation_loop(events),
            "iterations": report.get("iterations", 1),
        })

    if not speedups:
        raise HTTPException(
            status_code=500, detail="Unable to produce aggregate metric from selected kernels")

    avg_speedup = round(sum(speedups) / len(speedups), 3)
    avg_improvement_pct = round((avg_speedup - 1.0) * 100.0, 2)

    return {
        "success": True,
        "baseline": "straight hipify output with minimal compile edits",
        "kernel_count": len(speedups),
        "aggregate_metric": {
            "average_speedup_vs_baseline": avg_speedup,
            "average_improvement_percent": avg_improvement_pct,
        },
        "runs": runs,
    }


@app.post("/recompile")
async def recompile_edited_code(req: dict):
    """
    Recompile endpoint for human override feature.
    Accepts edited HIP code and re-runs tester.
    """
    try:
        edited_code = req.get("edited_code")
        kernel_name = req.get("kernel_name", "custom")

        if not edited_code or len(edited_code.strip()) < 10:
            raise HTTPException(status_code=400, detail="No HIP code provided")

        # Create a mock analyzer result for testing
        analyzer_result = AnalyzerResult(
            kernels_found=["test_kernel"],
            cuda_apis=["hipMalloc", "hipMemcpy"],
            warp_size_issue=False,
            warp_size_detail=None,
            workload_type=WorkloadType.MEMORY_BOUND,
            sharding_detected=False,
            difficulty="Easy",
            difficulty_reason="Simple test kernel"
        )

        # Run tester with edited code
        tester_result = await asyncio.to_thread(run_tester, edited_code, analyzer_result, 2, kernel_name)

        return {
            "success": True,
            "result": tester_result.model_dump()
        }

    except Exception as e:
        raise HTTPException(
            status_code=500, detail=f"Recompilation failed: {str(e)}") from e


@app.post("/export")
async def export_migration_package(req: dict):
    """
    Export endpoint for GitHub PR simulation.
    Returns a zip file with diff and migration report.
    """
    try:
        migration_report = req.get("migration_report", {})
        if not isinstance(migration_report, dict):
            migration_report = {}

        original_cuda = str(req.get("original_cuda") or "")
        # Fallback to report content when frontend omits final_rocm.
        final_rocm = str(req.get("final_rocm")
                         or migration_report.get("optimized_code") or "")

        if not final_rocm.strip():
            raise HTTPException(
                status_code=400, detail="No ROCm code provided for export")

        zip_buffer = io.BytesIO()
        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf:
            # Add professional unified diff
            diff = difflib.unified_diff(
                original_cuda.splitlines(keepends=True),
                final_rocm.splitlines(keepends=True),
                fromfile="original.cu",
                tofile="optimized.hip"
            )
            diff_text = "".join(diff)
            zf.writestr("migration.diff", diff_text)

            # Include source snapshots for easier review in PRs.
            zf.writestr("original.cu", original_cuda)
            zf.writestr("optimized.hip", final_rocm)

            # Add migration report as markdown
            md_report = f"""# ROCmPort AI Migration Report

## Performance Results
- Speedup: {migration_report.get('speedup', 'N/A')}x
- Bandwidth Utilization: {migration_report.get('bandwidth_utilized', 'N/A')}%
- Total Changes: {migration_report.get('total_changes', 'N/A')}

## AMD Advantage Explanation
{migration_report.get('amd_advantage_explanation', 'N/A')}

## Cost Impact
{migration_report.get('cost_estimate', 'N/A')}

Generated by ROCmPort AI.
"""
            zf.writestr("migration_report.md", md_report)

        zip_content = zip_buffer.getvalue()

        from fastapi.responses import Response
        return Response(
            content=zip_content,
            media_type="application/zip",
            headers={
                "Content-Disposition": "attachment; filename=rocmport_migration.zip"}
        )

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(
            status_code=500, detail=f"Export failed: {str(e)}") from e


@app.get("/demo-kernels")
async def list_demo_kernels():
    kernels_dir = os.path.join(os.path.dirname(__file__), "demo_kernels")
    kernels = {}
    for fname in os.listdir(kernels_dir):
        if fname.endswith(".cu"):
            name = fname.replace(".cu", "")
            with open(os.path.join(kernels_dir, fname), encoding="utf-8") as f:
                kernels[name] = f.read()
    return kernels


# Serve compiled frontend when available; fall back to the source folder for dev.
frontend_root = os.path.join(os.path.dirname(__file__), "..", "frontend")
frontend_dist = os.path.join(frontend_root, "dist")
frontend_path = frontend_dist if os.path.exists(
    frontend_dist) else frontend_root
if os.path.exists(frontend_path):
    app.mount("/", StaticFiles(directory=frontend_path,
              html=True), name="frontend")