import fitz # PyMuPDF import html import zlib import base64 import re import requests from pathlib import Path from fastapi import FastAPI, UploadFile, File, HTTPException from fastapi.responses import HTMLResponse, FileResponse import tempfile import os app = FastAPI() def get_color(color_int): if color_int is None: return "#000" r, g, b = (color_int >> 16) & 0xFF, (color_int >> 8) & 0xFF, color_int & 0xFF if r % 17 == 0 and g % 17 == 0 and b % 17 == 0: return f"#{r//17:x}{g//17:x}{b//17:x}" return f"#{r:02x}{g:02x}{b:02x}" def perform_conversion(pdf_path): """Logique de conversion originale conservée à 100%""" doc = fitz.open(pdf_path) raw = '
' for page in doc: w, h = int(page.rect.width), int(page.rect.height) raw += f'' path_groups = {} for path in page.get_drawings(): c = get_color(path.get("color")) w_raw = path.get("width") wd = float(w_raw) if w_raw is not None else 1.0 key = (c, wd) d_chunk = "" for item in path["items"]: if item[0] == "l": d_chunk += f"M{int(item[1].x)} {int(item[1].y)}L{int(item[2].x)} {int(item[2].y)}" elif item[0] == "re": r = item[1] d_chunk += f"M{int(r.x0)} {int(r.y0)}H{int(r.x1)}V{int(r.y1)}H{int(r.x0)}Z" if d_chunk: path_groups[key] = path_groups.get(key, "") + d_chunk for (stk_c, stk_w), d_str in path_groups.items(): sw = f' stroke-width="{stk_w:.1f}"' if stk_w != 1.0 else "" raw += f'' text_dict = page.get_text("rawdict") for block in text_dict["blocks"]: for line in block.get("lines", []): for span in line.get("spans", []): cl = get_color(span["color"]) fz = round(span["size"], 1) fw = ' font-weight="bold"' if "Bold" in span["font"] else "" fs = ' font-style="italic"' if "Italic" in span["font"] else "" xs, ys, chars = [], [], [] for char in span.get("chars", []): txt = html.escape(char["c"]) if not txt.strip(): continue xs.append(str(int(char["origin"][0]))) ys.append(str(int(char["origin"][1]))) chars.append(txt) if chars: y_attr = ys[0] if len(set(ys)) == 1 else " ".join(ys) raw += f'{"".join(chars)}' raw += '' raw += '
' raw_min = re.sub(r'\s+', ' ', raw).replace('> <', '><').replace(': ', ':').replace('; ', ';') z_data = base64.b64encode(zlib.compress(raw_min.encode('utf-8'), level=9)).decode('utf-8') doc.close() return f'
' @app.post("/convert-file") async def convert_file(file: UploadFile = File(...)): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: tmp.write(await file.read()) tmp_path = tmp.name try: result = perform_conversion(tmp_path) os.unlink(tmp_path) return HTMLResponse(content=result) except Exception as e: return {"error": str(e)} @app.get("/convert-url") async def convert_url(url: str): try: response = requests.get(url) with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: tmp.write(response.content) tmp_path = tmp.name result = perform_conversion(tmp_path) os.unlink(tmp_path) return HTMLResponse(content=result) except Exception as e: raise HTTPException(status_code=400, detail=str(e)) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)