Spaces:
Sleeping
Sleeping
File size: 5,475 Bytes
152df72 c842ab7 4e099cb c842ab7 152df72 c842ab7 28b59e0 c842ab7 28b59e0 c842ab7 28b59e0 c842ab7 28b59e0 c842ab7 28b59e0 152df72 c842ab7 152df72 28b59e0 c842ab7 152df72 c842ab7 152df72 28b59e0 c842ab7 152df72 c842ab7 28b59e0 c842ab7 152df72 28b59e0 c842ab7 152df72 28b59e0 c842ab7 152df72 28b59e0 c842ab7 152df72 c842ab7 152df72 c842ab7 152df72 4e099cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
"""
api_backend.py
FastAPI backend for flowchart-to-English processing. This API supports receiving
an image file, running YOLO-based detection to identify boxes and arrows, performing
OCR, and generating structured JSON + English summary of the flowchart.
Endpoints:
- POST /process-image: Accepts image input and returns structured flowchart data.
"""
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import uvicorn
from PIL import Image
import io
import json
import base64
import os
# π₯ Initialize FastAPI app
app = FastAPI()
# π Enable CORS with more specific configuration for Hugging Face Spaces
app.add_middleware(
CORSMiddleware,
allow_origins=["*", "https://venkatviswa-flowchart-to-text.hf.space"], # Include your specific domain
allow_credentials=True,
allow_methods=["GET", "POST", "OPTIONS"], # Explicitly allow methods
allow_headers=["*"],
expose_headers=["*"],
)
# Add a health check endpoint
@app.get("/")
async def health_check():
"""Health check endpoint to verify API is running."""
return {"status": "ok", "message": "API is running"}
@app.options("/process-image")
async def options_process_image():
"""Handle OPTIONS requests for the process-image endpoint."""
return {}
# Lazy-loading for ML modules to avoid startup issues
SKIP_MODEL_LOADING = os.getenv("SKIP_MODEL_LOADING", "0") == "1"
yolo_module = None
ocr_module = None
graph_module = None
summarizer_module = None
def load_modules():
global yolo_module, ocr_module, graph_module, summarizer_module
if yolo_module is None:
# Only import these when needed, not during startup
from yolo_module import run_yolo as yolo_run
from ocr_module import extract_text as ocr_extract, count_elements, validate_structure
from graph_module import map_arrows, build_flowchart_json
from summarizer_module import summarize_flowchart
yolo_module = {"run_yolo": yolo_run}
ocr_module = {
"extract_text": ocr_extract,
"count_elements": count_elements,
"validate_structure": validate_structure
}
graph_module = {
"map_arrows": map_arrows,
"build_flowchart_json": build_flowchart_json
}
summarizer_module = {"summarize_flowchart": summarize_flowchart}
@app.post("/process-image")
async def process_image(
file: UploadFile = File(...),
debug: str = Form("false")
):
"""
Receives an uploaded flowchart image, performs object detection and OCR,
constructs a structured flowchart JSON, and generates a plain-English summary.
Args:
file (UploadFile): Flowchart image file (.png, .jpg, .jpeg).
debug (str): "true" to enable debug mode (includes OCR logs and YOLO preview).
Returns:
JSONResponse: Contains flowchart structure, summary, debug output, and optional YOLO overlay.
"""
# Lazy load modules when first request comes in
load_modules()
debug_mode = debug.lower() == "true"
debug_log = []
if debug_mode:
debug_log.append("π₯ Received file upload")
print(f"π₯ File received: {file.filename}")
# πΌοΈ Convert file bytes to RGB image
contents = await file.read()
image = Image.open(io.BytesIO(contents)).convert("RGB")
if debug_mode:
debug_log.append("β
Image converted to RGB")
print("β
Image converted to RGB")
# π¦ YOLO Detection for boxes and arrows
boxes, arrows, vis_debug = yolo_module["run_yolo"](image)
if debug_mode:
debug_log.append(f"π¦ Detected {len(boxes)} boxes, {len(arrows)} arrows")
# π Run OCR on each detected box
for box in boxes:
box["text"] = ocr_module["extract_text"](image, box["bbox"], debug=debug_mode)
print(f"π OCR for {box['id']}: {box['text']}")
if debug_mode:
debug_log.append(f"π {box['id']}: {box['text']}")
# π§ Build structured JSON from nodes and edges
flowchart_json = graph_module["build_flowchart_json"](boxes, arrows)
print("π§ Flowchart JSON:", json.dumps(flowchart_json, indent=2))
# β
Validate structure
structure_info = ocr_module["count_elements"](boxes, arrows, debug=debug_mode)
validation = ocr_module["validate_structure"](
flowchart_json,
expected_boxes=structure_info["box_count"],
expected_arrows=len(arrows),
debug=debug_mode
)
if debug_mode:
debug_log.append(f"π§Ύ Validation: {validation}")
# βοΈ Generate plain-English summary
summary = summarizer_module["summarize_flowchart"](flowchart_json)
print("π Summary:", summary)
# πΌοΈ Encode YOLO debug image (if debug enabled)
yolo_vis = None
if debug_mode and vis_debug:
vis_io = io.BytesIO()
vis_debug.save(vis_io, format="PNG")
yolo_vis = base64.b64encode(vis_io.getvalue()).decode("utf-8")
# π€ Return full response
return JSONResponse({
"flowchart": flowchart_json,
"summary": summary,
"yolo_vis": yolo_vis,
"debug": "\n".join(debug_log) if debug_mode else ""
})
if __name__ == "__main__":
# Run the FastAPI app using Uvicorn
# Get port from environment variable or use default 7860
port = int(os.getenv("API_PORT", 7860))
uvicorn.run(app, host="0.0.0.0", port=port) |