JairoDanielMT commited on
Commit
e0b5e77
1 Parent(s): 7710c1b

texto seleccionable

Browse files
Files changed (3) hide show
  1. Dockerfile +7 -10
  2. app/main.py +18 -10
  3. app/renderer.py +6 -36
Dockerfile CHANGED
@@ -1,30 +1,27 @@
1
- # Imagen base con Debian bullseye (tiene wkhtmltopdf en apt)
2
  FROM python:3.11-bullseye
3
 
4
- # Evitar prompts interactivos en apt
5
  ENV DEBIAN_FRONTEND=noninteractive
6
 
7
  WORKDIR /app
8
 
9
- # Instalar dependencias de sistema necesarias para wkhtmltopdf
10
  RUN apt-get update && \
11
  apt-get install -y --no-install-recommends \
12
- wkhtmltopdf \
13
- fonts-dejavu-core \
 
 
 
 
14
  && rm -rf /var/lib/apt/lists/*
15
 
16
- # Copiar requirements e instalar dependencias de Python
17
  COPY requirements.txt .
18
  RUN pip install --no-cache-dir -r requirements.txt
19
 
20
- # Copiar el c贸digo de la app
21
  COPY app/ app/
22
 
23
- # Variables de entorno 煤tiles
24
  ENV PYTHONUNBUFFERED=1
25
 
26
- # Hugging Face define $PORT en runtime, pero exponemos 7860 por convenci贸n
27
  EXPOSE 7860
28
 
29
- # Comando de arranque
30
  CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-7860}"]
 
 
1
  FROM python:3.11-bullseye
2
 
 
3
  ENV DEBIAN_FRONTEND=noninteractive
4
 
5
  WORKDIR /app
6
 
7
+ # Dependencias de sistema para WeasyPrint (Cairo, Pango, etc.)
8
  RUN apt-get update && \
9
  apt-get install -y --no-install-recommends \
10
+ libpango-1.0-0 \
11
+ libpangocairo-1.0-0 \
12
+ libcairo2 \
13
+ libgdk-pixbuf2.0-0 \
14
+ libffi-dev \
15
+ shared-mime-info \
16
  && rm -rf /var/lib/apt/lists/*
17
 
 
18
  COPY requirements.txt .
19
  RUN pip install --no-cache-dir -r requirements.txt
20
 
 
21
  COPY app/ app/
22
 
 
23
  ENV PYTHONUNBUFFERED=1
24
 
 
25
  EXPOSE 7860
26
 
 
27
  CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-7860}"]
app/main.py CHANGED
@@ -22,11 +22,17 @@ app = FastAPI(
22
  class CompileRequest(BaseModel):
23
  doc: Dict[str, Any] = Field(
24
  ...,
25
- description="JSON de documentaci贸n generado por la IA (incluyendo metadata.script_type).",
 
 
 
26
  )
27
  job_id: Optional[str] = Field(
28
  None,
29
- description="Identificador opcional para el archivo (por ejemplo, nombre del script o job).",
 
 
 
30
  )
31
  output: Literal["html", "pdf"] = Field(
32
  "pdf",
@@ -42,9 +48,7 @@ def root():
42
 
43
  @app.get("/health")
44
  def health_check():
45
- """
46
- Endpoint simple de healthcheck para monitoreo.
47
- """
48
  return {"status": "ok"}
49
 
50
 
@@ -59,7 +63,10 @@ def compile_html(req: CompileRequest):
59
  try:
60
  html = render_html(req.doc)
61
  except Exception as exc:
62
- raise HTTPException(status_code=500, detail=f"Error al renderizar HTML: {exc}")
 
 
 
63
 
64
  return HTMLResponse(content=html)
65
 
@@ -67,7 +74,7 @@ def compile_html(req: CompileRequest):
67
  @app.post("/compile/pdf")
68
  def compile_pdf(req: CompileRequest):
69
  """
70
- Recibe JSON y devuelve el PDF compilado usando wkhtmltopdf.
71
  """
72
  if not req.doc:
73
  raise HTTPException(status_code=400, detail="Campo 'doc' es obligatorio")
@@ -76,10 +83,11 @@ def compile_pdf(req: CompileRequest):
76
 
77
  try:
78
  pdf_bytes = render_pdf_bytes(req.doc)
79
- except RuntimeError as exc:
80
- raise HTTPException(status_code=500, detail=str(exc))
81
  except Exception as exc:
82
- raise HTTPException(status_code=500, detail=f"Error al generar PDF: {exc}")
 
 
 
83
 
84
  filename = f"{job_id}.pdf"
85
 
 
22
  class CompileRequest(BaseModel):
23
  doc: Dict[str, Any] = Field(
24
  ...,
25
+ description=(
26
+ "JSON de documentaci贸n generado por la IA "
27
+ "(incluyendo metadata.script_type)."
28
+ ),
29
  )
30
  job_id: Optional[str] = Field(
31
  None,
32
+ description=(
33
+ "Identificador opcional para el archivo "
34
+ "(por ejemplo, nombre del script o job)."
35
+ ),
36
  )
37
  output: Literal["html", "pdf"] = Field(
38
  "pdf",
 
48
 
49
  @app.get("/health")
50
  def health_check():
51
+ """Endpoint simple de healthcheck para monitoreo."""
 
 
52
  return {"status": "ok"}
53
 
54
 
 
63
  try:
64
  html = render_html(req.doc)
65
  except Exception as exc:
66
+ raise HTTPException(
67
+ status_code=500,
68
+ detail=f"Error al renderizar HTML: {exc}",
69
+ )
70
 
71
  return HTMLResponse(content=html)
72
 
 
74
  @app.post("/compile/pdf")
75
  def compile_pdf(req: CompileRequest):
76
  """
77
+ Recibe JSON y devuelve el PDF compilado (WeasyPrint).
78
  """
79
  if not req.doc:
80
  raise HTTPException(status_code=400, detail="Campo 'doc' es obligatorio")
 
83
 
84
  try:
85
  pdf_bytes = render_pdf_bytes(req.doc)
 
 
86
  except Exception as exc:
87
+ raise HTTPException(
88
+ status_code=500,
89
+ detail=f"Error al generar PDF: {exc}",
90
+ )
91
 
92
  filename = f"{job_id}.pdf"
93
 
app/renderer.py CHANGED
@@ -3,15 +3,13 @@ from __future__ import annotations
3
  from datetime import datetime
4
  from typing import Any, Dict
5
 
6
- import pdfkit
7
  from jinja2 import Environment, FileSystemLoader, select_autoescape
 
8
 
9
- from .config import BASE_DIR, get_pdfkit_config
10
 
11
- # Carpeta de plantillas Jinja2
12
  TEMPLATES_DIR = BASE_DIR / "templates"
13
 
14
- # Mapeo script_type -> plantilla
15
  TEMPLATE_MAP: Dict[str, str] = {
16
  "RESTlet": "RESTlet.html.j2",
17
  "ClientScript": "ClientScript.html.j2",
@@ -26,7 +24,6 @@ TEMPLATE_MAP: Dict[str, str] = {
26
  "InternalModule": "InternalModule.html.j2",
27
  }
28
 
29
- # Entorno Jinja
30
  env = Environment(
31
  loader=FileSystemLoader(str(TEMPLATES_DIR)),
32
  autoescape=select_autoescape(["html", "xml"]),
@@ -36,10 +33,6 @@ env = Environment(
36
 
37
 
38
  def _resolve_template(doc: Dict[str, Any]) -> str:
39
- """
40
- A partir del JSON determina qu茅 plantilla Jinja usar
41
- leyendo metadata.script_type o metadata.type.
42
- """
43
  metadata = doc.get("metadata") or {}
44
  script_type = (
45
  metadata.get("script_type") or metadata.get("type") or "InternalModule"
@@ -48,12 +41,8 @@ def _resolve_template(doc: Dict[str, Any]) -> str:
48
 
49
 
50
  def render_html(doc: Dict[str, Any]) -> str:
51
- """
52
- Renderiza el HTML final a partir del JSON y la plantilla Jinja2 apropiada.
53
- """
54
  template_name = _resolve_template(doc)
55
  template = env.get_template(template_name)
56
-
57
  html = template.render(
58
  doc=doc,
59
  generated_at=datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC"),
@@ -61,32 +50,13 @@ def render_html(doc: Dict[str, Any]) -> str:
61
  return html
62
 
63
 
64
- def _wkhtml_options() -> Dict[str, str]:
65
- return {
66
- "page-size": "A4",
67
- "encoding": "UTF-8",
68
- "margin-top": "25.4mm",
69
- "margin-right": "25.4mm",
70
- "margin-bottom": "25.4mm",
71
- "margin-left": "25.4mm",
72
- }
73
-
74
-
75
  def render_pdf_bytes(doc: Dict[str, Any]) -> bytes:
76
  """
77
- Toma el JSON, renderiza el HTML y lo compila a PDF (bytes en memoria).
78
- Lanza RuntimeError si wkhtmltopdf no est谩 disponible.
79
  """
80
  html = render_html(doc)
81
- config = get_pdfkit_config()
82
 
83
- if config is None:
84
- raise RuntimeError("wkhtmltopdf no disponible en el servicio doc-compiler.")
85
-
86
- pdf_bytes = pdfkit.from_string(
87
- html,
88
- False, # False => devuelve bytes en vez de escribir archivo
89
- options=_wkhtml_options(),
90
- configuration=config,
91
- )
92
  return pdf_bytes
 
3
  from datetime import datetime
4
  from typing import Any, Dict
5
 
 
6
  from jinja2 import Environment, FileSystemLoader, select_autoescape
7
+ from weasyprint import HTML
8
 
9
+ from .config import BASE_DIR # si lo usas para rutas
10
 
 
11
  TEMPLATES_DIR = BASE_DIR / "templates"
12
 
 
13
  TEMPLATE_MAP: Dict[str, str] = {
14
  "RESTlet": "RESTlet.html.j2",
15
  "ClientScript": "ClientScript.html.j2",
 
24
  "InternalModule": "InternalModule.html.j2",
25
  }
26
 
 
27
  env = Environment(
28
  loader=FileSystemLoader(str(TEMPLATES_DIR)),
29
  autoescape=select_autoescape(["html", "xml"]),
 
33
 
34
 
35
  def _resolve_template(doc: Dict[str, Any]) -> str:
 
 
 
 
36
  metadata = doc.get("metadata") or {}
37
  script_type = (
38
  metadata.get("script_type") or metadata.get("type") or "InternalModule"
 
41
 
42
 
43
  def render_html(doc: Dict[str, Any]) -> str:
 
 
 
44
  template_name = _resolve_template(doc)
45
  template = env.get_template(template_name)
 
46
  html = template.render(
47
  doc=doc,
48
  generated_at=datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC"),
 
50
  return html
51
 
52
 
 
 
 
 
 
 
 
 
 
 
 
53
  def render_pdf_bytes(doc: Dict[str, Any]) -> bytes:
54
  """
55
+ Genera un PDF vectorial a partir del HTML usando WeasyPrint.
56
+ El texto es seleccionable.
57
  """
58
  html = render_html(doc)
 
59
 
60
+ # base_url permite que WeasyPrint resuelva rutas relativas (si tuvieras CSS/imagenes locales)
61
+ pdf_bytes: bytes = HTML(string=html, base_url=str(TEMPLATES_DIR)).write_pdf()
 
 
 
 
 
 
 
62
  return pdf_bytes