File size: 5,475 Bytes
152df72
 
 
 
 
 
 
 
 
 
 
c842ab7
 
 
 
 
 
 
 
4e099cb
c842ab7
152df72
c842ab7
 
28b59e0
c842ab7
 
28b59e0
c842ab7
28b59e0
c842ab7
28b59e0
c842ab7
 
28b59e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152df72
c842ab7
152df72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28b59e0
 
 
c842ab7
 
 
 
 
 
 
152df72
c842ab7
 
 
 
 
 
152df72
28b59e0
c842ab7
 
 
152df72
c842ab7
28b59e0
c842ab7
 
 
 
152df72
28b59e0
c842ab7
 
152df72
28b59e0
 
c842ab7
 
 
 
 
 
 
 
152df72
28b59e0
c842ab7
 
152df72
c842ab7
 
 
 
 
 
152df72
c842ab7
 
 
 
 
 
 
 
 
152df72
4e099cb
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""
api_backend.py

FastAPI backend for flowchart-to-English processing. This API supports receiving
an image file, running YOLO-based detection to identify boxes and arrows, performing
OCR, and generating structured JSON + English summary of the flowchart.

Endpoints:
- POST /process-image: Accepts image input and returns structured flowchart data.
"""

from fastapi import FastAPI, UploadFile, File, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import uvicorn
from PIL import Image
import io
import json
import base64
import os

# πŸ”₯ Initialize FastAPI app
app = FastAPI()

# πŸ”“ Enable CORS with more specific configuration for Hugging Face Spaces
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*", "https://venkatviswa-flowchart-to-text.hf.space"],  # Include your specific domain
    allow_credentials=True,
    allow_methods=["GET", "POST", "OPTIONS"],  # Explicitly allow methods
    allow_headers=["*"],
    expose_headers=["*"],
)

# Add a health check endpoint
@app.get("/")
async def health_check():
    """Health check endpoint to verify API is running."""
    return {"status": "ok", "message": "API is running"}


@app.options("/process-image")
async def options_process_image():
    """Handle OPTIONS requests for the process-image endpoint."""
    return {}


# Lazy-loading for ML modules to avoid startup issues
SKIP_MODEL_LOADING = os.getenv("SKIP_MODEL_LOADING", "0") == "1"
yolo_module = None
ocr_module = None
graph_module = None
summarizer_module = None

def load_modules():
    global yolo_module, ocr_module, graph_module, summarizer_module
    
    if yolo_module is None:
        # Only import these when needed, not during startup
        from yolo_module import run_yolo as yolo_run
        from ocr_module import extract_text as ocr_extract, count_elements, validate_structure
        from graph_module import map_arrows, build_flowchart_json
        from summarizer_module import summarize_flowchart
        
        yolo_module = {"run_yolo": yolo_run}
        ocr_module = {
            "extract_text": ocr_extract,
            "count_elements": count_elements,
            "validate_structure": validate_structure
        }
        graph_module = {
            "map_arrows": map_arrows,
            "build_flowchart_json": build_flowchart_json
        }
        summarizer_module = {"summarize_flowchart": summarize_flowchart}


@app.post("/process-image")
async def process_image(
    file: UploadFile = File(...),
    debug: str = Form("false")
):
    """
    Receives an uploaded flowchart image, performs object detection and OCR,
    constructs a structured flowchart JSON, and generates a plain-English summary.

    Args:
        file (UploadFile): Flowchart image file (.png, .jpg, .jpeg).
        debug (str): "true" to enable debug mode (includes OCR logs and YOLO preview).

    Returns:
        JSONResponse: Contains flowchart structure, summary, debug output, and optional YOLO overlay.
    """
    # Lazy load modules when first request comes in
    load_modules()
    
    debug_mode = debug.lower() == "true"
    debug_log = []

    if debug_mode:
        debug_log.append("πŸ“₯ Received file upload")
    print(f"πŸ“₯ File received: {file.filename}")

    # πŸ–ΌοΈ Convert file bytes to RGB image
    contents = await file.read()
    image = Image.open(io.BytesIO(contents)).convert("RGB")
    if debug_mode:
        debug_log.append("βœ… Image converted to RGB")
    print("βœ… Image converted to RGB")

    # πŸ“¦ YOLO Detection for boxes and arrows
    boxes, arrows, vis_debug = yolo_module["run_yolo"](image)
    if debug_mode:
        debug_log.append(f"πŸ“¦ Detected {len(boxes)} boxes, {len(arrows)} arrows")

    # πŸ” Run OCR on each detected box
    for box in boxes:
        box["text"] = ocr_module["extract_text"](image, box["bbox"], debug=debug_mode)
        print(f"πŸ” OCR for {box['id']}: {box['text']}")
        if debug_mode:
            debug_log.append(f"πŸ” {box['id']}: {box['text']}")

    # 🧠 Build structured JSON from nodes and edges
    flowchart_json = graph_module["build_flowchart_json"](boxes, arrows)
    print("🧠 Flowchart JSON:", json.dumps(flowchart_json, indent=2))

    # βœ… Validate structure
    structure_info = ocr_module["count_elements"](boxes, arrows, debug=debug_mode)
    validation = ocr_module["validate_structure"](
        flowchart_json,
        expected_boxes=structure_info["box_count"],
        expected_arrows=len(arrows),
        debug=debug_mode
    )
    if debug_mode:
        debug_log.append(f"🧾 Validation: {validation}")

    # ✍️ Generate plain-English summary
    summary = summarizer_module["summarize_flowchart"](flowchart_json)
    print("πŸ“ Summary:", summary)

    # πŸ–ΌοΈ Encode YOLO debug image (if debug enabled)
    yolo_vis = None
    if debug_mode and vis_debug:
        vis_io = io.BytesIO()
        vis_debug.save(vis_io, format="PNG")
        yolo_vis = base64.b64encode(vis_io.getvalue()).decode("utf-8")

    # πŸ“€ Return full response
    return JSONResponse({
        "flowchart": flowchart_json,
        "summary": summary,
        "yolo_vis": yolo_vis,
        "debug": "\n".join(debug_log) if debug_mode else ""
    })


if __name__ == "__main__":
    # Run the FastAPI app using Uvicorn
    # Get port from environment variable or use default 7860
    port = int(os.getenv("API_PORT", 7860))
    uvicorn.run(app, host="0.0.0.0", port=port)