Spaces:

h-rand
/

solfa-worker

Sleeping

App Files Files Community

solfa-worker / app.py

h-rand

Update app.py

6820f34 verified about 2 months ago

raw

history blame contribute delete

4.53 kB

	import fitz # PyMuPDF
	import html
	import zlib
	import base64
	import re
	import requests
	from pathlib import Path
	from fastapi import FastAPI, UploadFile, File, HTTPException
	from fastapi.responses import HTMLResponse, FileResponse
	import tempfile
	import os

	app = FastAPI()

	def get_color(color_int):
	if color_int is None: return "#000"
	r, g, b = (color_int >> 16) & 0xFF, (color_int >> 8) & 0xFF, color_int & 0xFF
	if r % 17 == 0 and g % 17 == 0 and b % 17 == 0:
	return f"#{r//17:x}{g//17:x}{b//17:x}"
	return f"#{r:02x}{g:02x}{b:02x}"

	def perform_conversion(pdf_path):
	"""Logique de conversion originale conservée à 100%"""
	doc = fitz.open(pdf_path)
	raw = '<style>.p{display:flex;flex-direction:column;align-items:center;width:100%}.c{background:var(--bg-solfa, #fff);width:100%;height:auto;display:block;font-family:serif}</style><div class="p">'

	for page in doc:
	w, h = int(page.rect.width), int(page.rect.height)
	raw += f'<svg class="c" viewBox="0 0 {w} {h}" xmlns="http://www.w3.org/2000/svg">'
	path_groups = {}
	for path in page.get_drawings():
	c = get_color(path.get("color"))
	w_raw = path.get("width")
	wd = float(w_raw) if w_raw is not None else 1.0
	key = (c, wd)
	d_chunk = ""
	for item in path["items"]:
	if item[0] == "l": d_chunk += f"M{int(item[1].x)} {int(item[1].y)}L{int(item[2].x)} {int(item[2].y)}"
	elif item[0] == "re":
	r = item[1]
	d_chunk += f"M{int(r.x0)} {int(r.y0)}H{int(r.x1)}V{int(r.y1)}H{int(r.x0)}Z"
	if d_chunk: path_groups[key] = path_groups.get(key, "") + d_chunk

	for (stk_c, stk_w), d_str in path_groups.items():
	sw = f' stroke-width="{stk_w:.1f}"' if stk_w != 1.0 else ""
	raw += f'<path d="{d_str}" stroke="{stk_c}"{sw} fill="none" stroke-linecap="butt"/>'

	text_dict = page.get_text("rawdict")
	for block in text_dict["blocks"]:
	for line in block.get("lines", []):
	for span in line.get("spans", []):
	cl = get_color(span["color"])
	fz = round(span["size"], 1)
	fw = ' font-weight="bold"' if "Bold" in span["font"] else ""
	fs = ' font-style="italic"' if "Italic" in span["font"] else ""
	xs, ys, chars = [], [], []
	for char in span.get("chars", []):
	txt = html.escape(char["c"])
	if not txt.strip(): continue
	xs.append(str(int(char["origin"][0])))
	ys.append(str(int(char["origin"][1])))
	chars.append(txt)
	if chars:
	y_attr = ys[0] if len(set(ys)) == 1 else " ".join(ys)
	raw += f'<text x="{" ".join(xs)}" y="{y_attr}" fill="{cl}" font-size="{fz}"{fw}{fs}>{"".join(chars)}</text>'
	raw += '</svg>'
	raw += '</div>'

	raw_min = re.sub(r'\s+', ' ', raw).replace('> <', '><').replace(': ', ':').replace('; ', ';')
	z_data = base64.b64encode(zlib.compress(raw_min.encode('utf-8'), level=9)).decode('utf-8')
	doc.close()

	return f'<div id="v"></div><script>(async()=>{{const b="{z_data}",s=Uint8Array.from(atob(b),c=>c.charCodeAt(0)),r=new Response(new Blob([s]).stream().pipeThrough(new DecompressionStream("deflate")));document.getElementById("v").outerHTML=await r.text()}})();</script>'

	@app.post("/convert-file")
	async def convert_file(file: UploadFile = File(...)):
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
	tmp.write(await file.read())
	tmp_path = tmp.name

	try:
	result = perform_conversion(tmp_path)
	os.unlink(tmp_path)
	return HTMLResponse(content=result)
	except Exception as e:
	return {"error": str(e)}

	@app.get("/convert-url")
	async def convert_url(url: str):
	try:
	response = requests.get(url)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
	tmp.write(response.content)
	tmp_path = tmp.name

	result = perform_conversion(tmp_path)
	os.unlink(tmp_path)
	return HTMLResponse(content=result)
	except Exception as e:
	raise HTTPException(status_code=400, detail=str(e))

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)