Spaces:
Sleeping
Sleeping
Musharraf commited on
Commit Β·
bc262f3
1
Parent(s): 065642b
Restructure project for multi-mode deployment: add uv.lock, openenv-core dependency, server entry point, flat app structure
Browse files- Dockerfile +9 -10
- __init__.py +3 -0
- invoice_extraction_env/__init__.py +0 -17
- openenv.yaml +1 -1
- pyproject.toml +5 -2
- requirements.txt +1 -0
- {invoice_extraction_env/server β server}/__init__.py +0 -0
- {invoice_extraction_env/server β server}/app.py +7 -1
- {invoice_extraction_env/server β server}/documents.py +0 -0
- {invoice_extraction_env/server β server}/environment.py +3 -4
- {invoice_extraction_env/server β server}/graders.py +0 -0
- {invoice_extraction_env β server}/models.py +0 -0
- uv.lock +0 -0
Dockerfile
CHANGED
|
@@ -1,22 +1,21 @@
|
|
| 1 |
FROM python:3.10-slim
|
| 2 |
|
|
|
|
|
|
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
-
# Install dependencies first for
|
| 6 |
-
COPY requirements.txt .
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
# Copy
|
| 10 |
COPY . .
|
| 11 |
|
| 12 |
-
# Set Python path so the package is importable
|
| 13 |
-
ENV PYTHONPATH="/app"
|
| 14 |
-
|
| 15 |
EXPOSE 7860
|
| 16 |
|
| 17 |
-
# Health check
|
| 18 |
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 19 |
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/health')" || exit 1
|
| 20 |
|
| 21 |
-
|
| 22 |
-
CMD ["uvicorn", "invoice_extraction_env.server.app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 1 |
FROM python:3.10-slim
|
| 2 |
|
| 3 |
+
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
|
| 4 |
+
|
| 5 |
WORKDIR /app
|
| 6 |
|
| 7 |
+
# Install dependencies first for caching
|
| 8 |
+
COPY requirements.txt pyproject.toml ./
|
| 9 |
+
COPY server/ server/
|
| 10 |
+
COPY __init__.py ./
|
| 11 |
+
RUN pip install --no-cache-dir .
|
| 12 |
|
| 13 |
+
# Copy remaining files
|
| 14 |
COPY . .
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
EXPOSE 7860
|
| 17 |
|
|
|
|
| 18 |
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 19 |
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/health')" || exit 1
|
| 20 |
|
| 21 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Invoice Extraction Environment β OpenEnv environment for document data extraction."""
|
| 2 |
+
|
| 3 |
+
__version__ = "0.1.0"
|
invoice_extraction_env/__init__.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Invoice Extraction Environment β An OpenEnv environment for structured
|
| 3 |
-
data extraction from unstructured invoice and receipt documents.
|
| 4 |
-
|
| 5 |
-
Example:
|
| 6 |
-
>>> import requests
|
| 7 |
-
>>> r = requests.post("http://localhost:7860/reset", json={"task_name": "simple_invoice"})
|
| 8 |
-
>>> obs = r.json()
|
| 9 |
-
"""
|
| 10 |
-
|
| 11 |
-
from .models import InvoiceAction, InvoiceObservation, InvoiceState
|
| 12 |
-
|
| 13 |
-
__all__ = [
|
| 14 |
-
"InvoiceAction",
|
| 15 |
-
"InvoiceObservation",
|
| 16 |
-
"InvoiceState",
|
| 17 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
openenv.yaml
CHANGED
|
@@ -2,5 +2,5 @@ spec_version: 1
|
|
| 2 |
name: invoice_extraction_env
|
| 3 |
type: space
|
| 4 |
runtime: fastapi
|
| 5 |
-
app:
|
| 6 |
port: 7860
|
|
|
|
| 2 |
name: invoice_extraction_env
|
| 3 |
type: space
|
| 4 |
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
port: 7860
|
pyproject.toml
CHANGED
|
@@ -8,6 +8,7 @@ version = "0.1.0"
|
|
| 8 |
description = "Invoice Extraction Environment for OpenEnv β extract structured data from unstructured invoices"
|
| 9 |
requires-python = ">=3.10"
|
| 10 |
dependencies = [
|
|
|
|
| 11 |
"fastapi>=0.115.0",
|
| 12 |
"pydantic>=2.0.0",
|
| 13 |
"uvicorn[standard]>=0.24.0",
|
|
@@ -15,11 +16,13 @@ dependencies = [
|
|
| 15 |
"requests>=2.31.0",
|
| 16 |
]
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
[project.optional-dependencies]
|
| 19 |
dev = [
|
| 20 |
"pytest>=8.0.0",
|
| 21 |
-
"pytest-cov>=4.0.0",
|
| 22 |
]
|
| 23 |
|
| 24 |
[tool.setuptools]
|
| 25 |
-
packages = ["
|
|
|
|
| 8 |
description = "Invoice Extraction Environment for OpenEnv β extract structured data from unstructured invoices"
|
| 9 |
requires-python = ">=3.10"
|
| 10 |
dependencies = [
|
| 11 |
+
"openenv-core>=0.2.0",
|
| 12 |
"fastapi>=0.115.0",
|
| 13 |
"pydantic>=2.0.0",
|
| 14 |
"uvicorn[standard]>=0.24.0",
|
|
|
|
| 16 |
"requests>=2.31.0",
|
| 17 |
]
|
| 18 |
|
| 19 |
+
[project.scripts]
|
| 20 |
+
server = "server.app:start"
|
| 21 |
+
|
| 22 |
[project.optional-dependencies]
|
| 23 |
dev = [
|
| 24 |
"pytest>=8.0.0",
|
|
|
|
| 25 |
]
|
| 26 |
|
| 27 |
[tool.setuptools]
|
| 28 |
+
packages = ["server"]
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
fastapi>=0.115.0
|
| 2 |
pydantic>=2.0.0
|
| 3 |
uvicorn[standard]>=0.24.0
|
|
|
|
| 1 |
+
openenv-core>=0.2.0
|
| 2 |
fastapi>=0.115.0
|
| 3 |
pydantic>=2.0.0
|
| 4 |
uvicorn[standard]>=0.24.0
|
{invoice_extraction_env/server β server}/__init__.py
RENAMED
|
File without changes
|
{invoice_extraction_env/server β server}/app.py
RENAMED
|
@@ -13,7 +13,7 @@ from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
|
| 13 |
from fastapi.responses import JSONResponse
|
| 14 |
from pydantic import BaseModel
|
| 15 |
|
| 16 |
-
from .
|
| 17 |
from .environment import InvoiceExtractionEnvironment
|
| 18 |
|
| 19 |
logger = logging.getLogger(__name__)
|
|
@@ -212,3 +212,9 @@ def create_invoice_app() -> FastAPI:
|
|
| 212 |
|
| 213 |
# Create the app instance
|
| 214 |
app = create_invoice_app()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
from fastapi.responses import JSONResponse
|
| 14 |
from pydantic import BaseModel
|
| 15 |
|
| 16 |
+
from .models import InvoiceAction, InvoiceObservation, InvoiceState
|
| 17 |
from .environment import InvoiceExtractionEnvironment
|
| 18 |
|
| 19 |
logger = logging.getLogger(__name__)
|
|
|
|
| 212 |
|
| 213 |
# Create the app instance
|
| 214 |
app = create_invoice_app()
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def start():
|
| 218 |
+
"""Entry point for `uv run server` / `[project.scripts]`."""
|
| 219 |
+
import uvicorn
|
| 220 |
+
uvicorn.run("server.app:app", host="0.0.0.0", port=7860)
|
{invoice_extraction_env/server β server}/documents.py
RENAMED
|
File without changes
|
{invoice_extraction_env/server β server}/environment.py
RENAMED
|
@@ -10,7 +10,7 @@ import json
|
|
| 10 |
from typing import Any, Optional
|
| 11 |
from uuid import uuid4
|
| 12 |
|
| 13 |
-
from .
|
| 14 |
from .documents import get_document, TASK_REQUIRED_FIELDS
|
| 15 |
from .graders import grade_extraction
|
| 16 |
|
|
@@ -266,7 +266,6 @@ class InvoiceExtractionEnvironment:
|
|
| 266 |
)
|
| 267 |
|
| 268 |
if not done and score < 0.95:
|
| 269 |
-
# Give hints about which fields need improvement
|
| 270 |
weak_fields = [
|
| 271 |
name for name, data in feedback.items()
|
| 272 |
if not data.get("matched", False)
|
|
@@ -309,9 +308,9 @@ class InvoiceExtractionEnvironment:
|
|
| 309 |
lines = ["Detailed feedback on last extraction:\n"]
|
| 310 |
for field, data in self._last_feedback.items():
|
| 311 |
score = data.get("score", 0.0)
|
| 312 |
-
matched = "
|
| 313 |
field_type = data.get("expected_type", "unknown")
|
| 314 |
-
lines.append(f" {matched} {field} ({field_type}): {score:.2f}")
|
| 315 |
|
| 316 |
lines.append(f"\nOverall best score: {self._state.best_score:.2f}")
|
| 317 |
lines.append(f"Attempts remaining: {self._state.max_attempts - self._state.attempts_used}")
|
|
|
|
| 10 |
from typing import Any, Optional
|
| 11 |
from uuid import uuid4
|
| 12 |
|
| 13 |
+
from .models import InvoiceAction, InvoiceObservation, InvoiceState
|
| 14 |
from .documents import get_document, TASK_REQUIRED_FIELDS
|
| 15 |
from .graders import grade_extraction
|
| 16 |
|
|
|
|
| 266 |
)
|
| 267 |
|
| 268 |
if not done and score < 0.95:
|
|
|
|
| 269 |
weak_fields = [
|
| 270 |
name for name, data in feedback.items()
|
| 271 |
if not data.get("matched", False)
|
|
|
|
| 308 |
lines = ["Detailed feedback on last extraction:\n"]
|
| 309 |
for field, data in self._last_feedback.items():
|
| 310 |
score = data.get("score", 0.0)
|
| 311 |
+
matched = "Y" if data.get("matched", False) else "N"
|
| 312 |
field_type = data.get("expected_type", "unknown")
|
| 313 |
+
lines.append(f" [{matched}] {field} ({field_type}): {score:.2f}")
|
| 314 |
|
| 315 |
lines.append(f"\nOverall best score: {self._state.best_score:.2f}")
|
| 316 |
lines.append(f"Attempts remaining: {self._state.max_attempts - self._state.attempts_used}")
|
{invoice_extraction_env/server β server}/graders.py
RENAMED
|
File without changes
|
{invoice_extraction_env β server}/models.py
RENAMED
|
File without changes
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|