solfa-worker / app.py
h-rand's picture
Update app.py
6820f34 verified
import fitz # PyMuPDF
import html
import zlib
import base64
import re
import requests
from pathlib import Path
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import HTMLResponse, FileResponse
import tempfile
import os
app = FastAPI()
def get_color(color_int):
if color_int is None: return "#000"
r, g, b = (color_int >> 16) & 0xFF, (color_int >> 8) & 0xFF, color_int & 0xFF
if r % 17 == 0 and g % 17 == 0 and b % 17 == 0:
return f"#{r//17:x}{g//17:x}{b//17:x}"
return f"#{r:02x}{g:02x}{b:02x}"
def perform_conversion(pdf_path):
"""Logique de conversion originale conservée à 100%"""
doc = fitz.open(pdf_path)
raw = '<style>.p{display:flex;flex-direction:column;align-items:center;width:100%}.c{background:var(--bg-solfa, #fff);width:100%;height:auto;display:block;font-family:serif}</style><div class="p">'
for page in doc:
w, h = int(page.rect.width), int(page.rect.height)
raw += f'<svg class="c" viewBox="0 0 {w} {h}" xmlns="http://www.w3.org/2000/svg">'
path_groups = {}
for path in page.get_drawings():
c = get_color(path.get("color"))
w_raw = path.get("width")
wd = float(w_raw) if w_raw is not None else 1.0
key = (c, wd)
d_chunk = ""
for item in path["items"]:
if item[0] == "l": d_chunk += f"M{int(item[1].x)} {int(item[1].y)}L{int(item[2].x)} {int(item[2].y)}"
elif item[0] == "re":
r = item[1]
d_chunk += f"M{int(r.x0)} {int(r.y0)}H{int(r.x1)}V{int(r.y1)}H{int(r.x0)}Z"
if d_chunk: path_groups[key] = path_groups.get(key, "") + d_chunk
for (stk_c, stk_w), d_str in path_groups.items():
sw = f' stroke-width="{stk_w:.1f}"' if stk_w != 1.0 else ""
raw += f'<path d="{d_str}" stroke="{stk_c}"{sw} fill="none" stroke-linecap="butt"/>'
text_dict = page.get_text("rawdict")
for block in text_dict["blocks"]:
for line in block.get("lines", []):
for span in line.get("spans", []):
cl = get_color(span["color"])
fz = round(span["size"], 1)
fw = ' font-weight="bold"' if "Bold" in span["font"] else ""
fs = ' font-style="italic"' if "Italic" in span["font"] else ""
xs, ys, chars = [], [], []
for char in span.get("chars", []):
txt = html.escape(char["c"])
if not txt.strip(): continue
xs.append(str(int(char["origin"][0])))
ys.append(str(int(char["origin"][1])))
chars.append(txt)
if chars:
y_attr = ys[0] if len(set(ys)) == 1 else " ".join(ys)
raw += f'<text x="{" ".join(xs)}" y="{y_attr}" fill="{cl}" font-size="{fz}"{fw}{fs}>{"".join(chars)}</text>'
raw += '</svg>'
raw += '</div>'
raw_min = re.sub(r'\s+', ' ', raw).replace('> <', '><').replace(': ', ':').replace('; ', ';')
z_data = base64.b64encode(zlib.compress(raw_min.encode('utf-8'), level=9)).decode('utf-8')
doc.close()
return f'<div id="v"></div><script>(async()=>{{const b="{z_data}",s=Uint8Array.from(atob(b),c=>c.charCodeAt(0)),r=new Response(new Blob([s]).stream().pipeThrough(new DecompressionStream("deflate")));document.getElementById("v").outerHTML=await r.text()}})();</script>'
@app.post("/convert-file")
async def convert_file(file: UploadFile = File(...)):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(await file.read())
tmp_path = tmp.name
try:
result = perform_conversion(tmp_path)
os.unlink(tmp_path)
return HTMLResponse(content=result)
except Exception as e:
return {"error": str(e)}
@app.get("/convert-url")
async def convert_url(url: str):
try:
response = requests.get(url)
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(response.content)
tmp_path = tmp.name
result = perform_conversion(tmp_path)
os.unlink(tmp_path)
return HTMLResponse(content=result)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)