Spaces:
Sleeping
Sleeping
| import fitz # PyMuPDF | |
| import html | |
| import zlib | |
| import base64 | |
| import re | |
| import requests | |
| from pathlib import Path | |
| from fastapi import FastAPI, UploadFile, File, HTTPException | |
| from fastapi.responses import HTMLResponse, FileResponse | |
| import tempfile | |
| import os | |
| app = FastAPI() | |
| def get_color(color_int): | |
| if color_int is None: return "#000" | |
| r, g, b = (color_int >> 16) & 0xFF, (color_int >> 8) & 0xFF, color_int & 0xFF | |
| if r % 17 == 0 and g % 17 == 0 and b % 17 == 0: | |
| return f"#{r//17:x}{g//17:x}{b//17:x}" | |
| return f"#{r:02x}{g:02x}{b:02x}" | |
| def perform_conversion(pdf_path): | |
| """Logique de conversion originale conservée à 100%""" | |
| doc = fitz.open(pdf_path) | |
| raw = '<style>.p{display:flex;flex-direction:column;align-items:center;width:100%}.c{background:var(--bg-solfa, #fff);width:100%;height:auto;display:block;font-family:serif}</style><div class="p">' | |
| for page in doc: | |
| w, h = int(page.rect.width), int(page.rect.height) | |
| raw += f'<svg class="c" viewBox="0 0 {w} {h}" xmlns="http://www.w3.org/2000/svg">' | |
| path_groups = {} | |
| for path in page.get_drawings(): | |
| c = get_color(path.get("color")) | |
| w_raw = path.get("width") | |
| wd = float(w_raw) if w_raw is not None else 1.0 | |
| key = (c, wd) | |
| d_chunk = "" | |
| for item in path["items"]: | |
| if item[0] == "l": d_chunk += f"M{int(item[1].x)} {int(item[1].y)}L{int(item[2].x)} {int(item[2].y)}" | |
| elif item[0] == "re": | |
| r = item[1] | |
| d_chunk += f"M{int(r.x0)} {int(r.y0)}H{int(r.x1)}V{int(r.y1)}H{int(r.x0)}Z" | |
| if d_chunk: path_groups[key] = path_groups.get(key, "") + d_chunk | |
| for (stk_c, stk_w), d_str in path_groups.items(): | |
| sw = f' stroke-width="{stk_w:.1f}"' if stk_w != 1.0 else "" | |
| raw += f'<path d="{d_str}" stroke="{stk_c}"{sw} fill="none" stroke-linecap="butt"/>' | |
| text_dict = page.get_text("rawdict") | |
| for block in text_dict["blocks"]: | |
| for line in block.get("lines", []): | |
| for span in line.get("spans", []): | |
| cl = get_color(span["color"]) | |
| fz = round(span["size"], 1) | |
| fw = ' font-weight="bold"' if "Bold" in span["font"] else "" | |
| fs = ' font-style="italic"' if "Italic" in span["font"] else "" | |
| xs, ys, chars = [], [], [] | |
| for char in span.get("chars", []): | |
| txt = html.escape(char["c"]) | |
| if not txt.strip(): continue | |
| xs.append(str(int(char["origin"][0]))) | |
| ys.append(str(int(char["origin"][1]))) | |
| chars.append(txt) | |
| if chars: | |
| y_attr = ys[0] if len(set(ys)) == 1 else " ".join(ys) | |
| raw += f'<text x="{" ".join(xs)}" y="{y_attr}" fill="{cl}" font-size="{fz}"{fw}{fs}>{"".join(chars)}</text>' | |
| raw += '</svg>' | |
| raw += '</div>' | |
| raw_min = re.sub(r'\s+', ' ', raw).replace('> <', '><').replace(': ', ':').replace('; ', ';') | |
| z_data = base64.b64encode(zlib.compress(raw_min.encode('utf-8'), level=9)).decode('utf-8') | |
| doc.close() | |
| return f'<div id="v"></div><script>(async()=>{{const b="{z_data}",s=Uint8Array.from(atob(b),c=>c.charCodeAt(0)),r=new Response(new Blob([s]).stream().pipeThrough(new DecompressionStream("deflate")));document.getElementById("v").outerHTML=await r.text()}})();</script>' | |
| async def convert_file(file: UploadFile = File(...)): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp.write(await file.read()) | |
| tmp_path = tmp.name | |
| try: | |
| result = perform_conversion(tmp_path) | |
| os.unlink(tmp_path) | |
| return HTMLResponse(content=result) | |
| except Exception as e: | |
| return {"error": str(e)} | |
| async def convert_url(url: str): | |
| try: | |
| response = requests.get(url) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp.write(response.content) | |
| tmp_path = tmp.name | |
| result = perform_conversion(tmp_path) | |
| os.unlink(tmp_path) | |
| return HTMLResponse(content=result) | |
| except Exception as e: | |
| raise HTTPException(status_code=400, detail=str(e)) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |