LeonardoMdSA commited on
Commit
b6e3994
·
1 Parent(s): 7cad4c7

logs and unit tests

Browse files
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Context-aware NLP classification platform with MCP
3
  emoji: 🧠
4
- colorFrom: red
5
- colorTo: deep red
6
  sdk: docker
7
  app_file: Dockerfile
8
  pinned: false
 
1
  ---
2
  title: Context-aware NLP classification platform with MCP
3
  emoji: 🧠
4
+ colorFrom: indigo
5
+ colorTo: red
6
  sdk: docker
7
  app_file: Dockerfile
8
  pinned: false
app/classification/decision.py CHANGED
@@ -2,7 +2,12 @@ from typing import Any
2
  from dataclasses import dataclass
3
  from app.config import get_settings
4
 
 
 
 
 
5
  settings = get_settings()
 
6
 
7
 
8
  @dataclass
@@ -18,6 +23,7 @@ def classify_document(text: str, context: Any) -> ClassificationDecision:
18
  - Model prediction
19
  - Confidence threshold
20
  - Abstention logic
 
21
  """
22
 
23
  from app.classification.model import Classifier
@@ -30,8 +36,9 @@ def classify_document(text: str, context: Any) -> ClassificationDecision:
30
  else:
31
  context_dict = {}
32
 
33
- result = classifier.predict(text=text, context=context_dict)
34
 
 
35
 
36
  label = result.get("label")
37
  confidence = result.get("confidence", 0.0)
@@ -40,6 +47,21 @@ def classify_document(text: str, context: Any) -> ClassificationDecision:
40
  if settings.ENABLE_ABSTENTION and confidence < settings.CONFIDENCE_THRESHOLD:
41
  label = None
42
  abstained = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  return ClassificationDecision(
45
  label=label,
 
2
  from dataclasses import dataclass
3
  from app.config import get_settings
4
 
5
+ from app.logging.inference_log import log_inference
6
+ from app.logging.context_log import log_context_resolution # ← added
7
+ import logging
8
+
9
  settings = get_settings()
10
+ logger = logging.getLogger(__name__)
11
 
12
 
13
  @dataclass
 
23
  - Model prediction
24
  - Confidence threshold
25
  - Abstention logic
26
+ - Logs inference and context usage
27
  """
28
 
29
  from app.classification.model import Classifier
 
36
  else:
37
  context_dict = {}
38
 
39
+ logger.info("Classification request received", extra={"text": text[:100]})
40
 
41
+ result = classifier.predict(text=text, context=context_dict)
42
 
43
  label = result.get("label")
44
  confidence = result.get("confidence", 0.0)
 
47
  if settings.ENABLE_ABSTENTION and confidence < settings.CONFIDENCE_THRESHOLD:
48
  label = None
49
  abstained = True
50
+ logger.warning(
51
+ "Low confidence classification, abstaining", extra={"confidence": confidence}
52
+ )
53
+
54
+ # Log to persistent JSON files
55
+ log_inference(
56
+ label=label,
57
+ confidence=confidence,
58
+ abstained=abstained,
59
+ text=text,
60
+ context=context_dict,
61
+ )
62
+
63
+ # Minimal addition: log context usage
64
+ log_context_resolution(context=context_dict, text=text)
65
 
66
  return ClassificationDecision(
67
  label=label,
app/context/resolver.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.logging.context_log import log_context_resolution
2
+ import logging
3
+
4
+ logger = logging.getLogger(__name__)
5
+
6
+
7
+ def resolve_context(text: str, metadata: dict = None) -> dict:
8
+ """
9
+ Placeholder for context resolution logic.
10
+ Logs which context sources were used via JSON audit.
11
+ """
12
+ # Dummy context (replace with actual MCP context resolution)
13
+ context = {
14
+ "summary": f"Context for '{text[:50]}'",
15
+ "sources": ["mcp_server_1", "mcp_server_2"],
16
+ }
17
+
18
+ logger.info("Context resolution performed", extra={"text": text[:100]})
19
+
20
+ # Log JSON audit
21
+ log_context_resolution(context, text, metadata)
22
+
23
+ return context
app/logging_config.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ LOG_LEVEL = logging.INFO
6
+ LOG_FORMAT = "%(asctime)s | %(levelname)s | %(name)s | %(message)s"
7
+
8
+ def setup_logging():
9
+ handlers = []
10
+
11
+ # Always log to stdout (HF Spaces, Docker, CI)
12
+ stdout_handler = logging.StreamHandler(sys.stdout)
13
+ stdout_handler.setFormatter(logging.Formatter(LOG_FORMAT))
14
+ handlers.append(stdout_handler)
15
+
16
+ # Optional file logging (local only)
17
+ logs_dir = Path("logs")
18
+ try:
19
+ logs_dir.mkdir(exist_ok=True)
20
+ file_handler = logging.FileHandler(logs_dir / "app.log")
21
+ file_handler.setFormatter(logging.Formatter(LOG_FORMAT))
22
+ handlers.append(file_handler)
23
+ except Exception:
24
+ # Fail silently — file logs are non-critical
25
+ pass
26
+
27
+ logging.basicConfig(
28
+ level=LOG_LEVEL,
29
+ handlers=handlers,
30
+ force=True, # overrides uvicorn defaults
31
+ )
32
+
33
+ # Reduce noise from dependencies
34
+ logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
35
+ logging.getLogger("httpx").setLevel(logging.WARNING)
app/main.py CHANGED
@@ -1,30 +1,50 @@
1
  from fastapi import FastAPI
2
  from fastapi.middleware.cors import CORSMiddleware
 
3
 
4
  from app.config import get_settings
5
  from app.api.routes import router as api_router
6
 
7
  # MCP (embedded mode)
8
- from app.orchestration.mcp_client import start_embedded_mcp_servers
9
- from app.orchestration.mcp_client import stop_embedded_mcp_servers
 
 
10
 
11
 
12
  def create_app() -> FastAPI:
13
  """
14
- Application factory.
15
-
16
- Ensures:
17
- - Deterministic startup
18
- - Clean separation of concerns
19
- - HF Spaces compatibility
20
  """
21
-
22
  settings = get_settings()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  app = FastAPI(
25
  title=settings.APP_NAME,
26
  debug=settings.DEBUG,
27
  version="0.1.0",
 
28
  )
29
 
30
  # -------------------------
@@ -43,33 +63,8 @@ def create_app() -> FastAPI:
43
  # -------------------------
44
  app.include_router(api_router)
45
 
46
- # -------------------------
47
- # Lifecycle Events
48
- # -------------------------
49
- @app.on_event("startup")
50
- async def on_startup() -> None:
51
- """
52
- Startup logic.
53
-
54
- In HF Spaces:
55
- - MCP servers are started embedded
56
-
57
- Locally:
58
- - MCP servers are external (docker-compose)
59
- """
60
- if settings.MCP_EMBEDDED:
61
- start_embedded_mcp_servers()
62
-
63
- @app.on_event("shutdown")
64
- async def on_shutdown() -> None:
65
- """
66
- Graceful shutdown.
67
- """
68
- if settings.MCP_EMBEDDED:
69
- stop_embedded_mcp_servers()
70
-
71
  return app
72
 
73
 
74
- # FastAPI entrypoint (required by HF Spaces and uvicorn)
75
  app = create_app()
 
1
  from fastapi import FastAPI
2
  from fastapi.middleware.cors import CORSMiddleware
3
+ from contextlib import asynccontextmanager
4
 
5
  from app.config import get_settings
6
  from app.api.routes import router as api_router
7
 
8
  # MCP (embedded mode)
9
+ from app.orchestration.mcp_client import start_embedded_mcp_servers, stop_embedded_mcp_servers
10
+
11
+ from app.logging_config import setup_logging
12
+ import logging
13
 
14
 
15
  def create_app() -> FastAPI:
16
  """
17
+ Application factory with lifespan handler for startup/shutdown.
 
 
 
 
 
18
  """
 
19
  settings = get_settings()
20
+
21
+ setup_logging()
22
+ logger = logging.getLogger(__name__)
23
+
24
+ @asynccontextmanager
25
+ async def lifespan(app: FastAPI):
26
+ """Handles startup and shutdown with lifespan."""
27
+ # Startup
28
+ logger.info("Application startup initiated")
29
+ if settings.MCP_EMBEDDED:
30
+ logger.info("Starting embedded MCP servers")
31
+ start_embedded_mcp_servers()
32
+ logger.info("Application startup complete")
33
+
34
+ yield # Control passes to FastAPI for request handling
35
+
36
+ # Shutdown
37
+ logger.info("Application shutdown initiated")
38
+ if settings.MCP_EMBEDDED:
39
+ logger.info("Stopping embedded MCP servers")
40
+ stop_embedded_mcp_servers()
41
+ logger.info("Application shutdown complete")
42
 
43
  app = FastAPI(
44
  title=settings.APP_NAME,
45
  debug=settings.DEBUG,
46
  version="0.1.0",
47
+ lifespan=lifespan, # ← attach lifespan handler
48
  )
49
 
50
  # -------------------------
 
63
  # -------------------------
64
  app.include_router(api_router)
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  return app
67
 
68
 
69
+ # FastAPI entrypoint
70
  app = create_app()
docs/TECH_DEBT.md CHANGED
@@ -1,7 +1,7 @@
1
  ## Pydantic / FastAPI deprecations
2
 
3
- - Migrate BaseSettings Config → ConfigDict
4
- - Replace Field(example=...) with json_schema_extra
5
- - Replace @app.on_event with lifespan handler
6
 
7
  Status: deferred (non-blocking)
 
1
  ## Pydantic / FastAPI deprecations
2
 
3
+ - Migrate BaseSettings `Config``ConfigDict`
4
+ - Replace `Field(example=...)` with `json_schema_extra`
5
+ - `datetime.utcnow()` usage in logging → switch to timezone-aware `datetime.now(datetime.UTC)`
6
 
7
  Status: deferred (non-blocking)
tests/conftest.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+
4
+ # Ensure root folder is in Python path so `import app` works
5
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
6
+
7
+ import pytest
8
+ from app.orchestration.mcp_client import start_embedded_mcp_servers, stop_embedded_mcp_servers
9
+
10
+ @pytest.fixture(scope="session", autouse=True)
11
+ def embedded_mcp_servers():
12
+ """
13
+ Starts embedded MCP servers for all tests that require MCP context.
14
+ Runs once per test session.
15
+ """
16
+ start_embedded_mcp_servers()
17
+ yield
18
+ stop_embedded_mcp_servers()
tests/test_classification.py CHANGED
@@ -1,9 +1,16 @@
1
  from app.classification.decision import classify_document
 
 
 
 
 
 
 
 
2
 
3
- def test_invoice_classification():
4
  result = classify_document(
5
  text="Invoice for Q4 2025 total amount $4,500",
6
- context={}
7
  )
8
 
9
  assert result.label == "finance.invoice"
@@ -11,10 +18,15 @@ def test_invoice_classification():
11
  assert result.abstained is False
12
 
13
 
14
- def test_abstention_logic():
 
 
 
 
 
15
  result = classify_document(
16
  text="Random unrelated text with no meaning",
17
- context={}
18
  )
19
 
20
  assert "confidence" in result.__dict__
 
1
  from app.classification.decision import classify_document
2
+ from app.orchestration.context_resolver import resolve_context
3
+
4
+ def test_invoice_classification(embedded_mcp_servers):
5
+ # Ensure MCP context is loaded
6
+ context = resolve_context(
7
+ text="Invoice for Q4 2025 total amount $4,500",
8
+ metadata={"department": "finance"}
9
+ )
10
 
 
11
  result = classify_document(
12
  text="Invoice for Q4 2025 total amount $4,500",
13
+ context=context
14
  )
15
 
16
  assert result.label == "finance.invoice"
 
18
  assert result.abstained is False
19
 
20
 
21
+ def test_abstention_logic(embedded_mcp_servers):
22
+ context = resolve_context(
23
+ text="Random unrelated text with no meaning",
24
+ metadata={}
25
+ )
26
+
27
  result = classify_document(
28
  text="Random unrelated text with no meaning",
29
+ context=context
30
  )
31
 
32
  assert "confidence" in result.__dict__
tests/test_context_resolution.py CHANGED
@@ -1,6 +1,6 @@
1
  from app.orchestration.context_resolver import resolve_context
2
 
3
- def test_context_resolution_embedded():
4
  context = resolve_context(
5
  text="Invoice for Q4 2025",
6
  metadata={"department": "finance"}
 
1
  from app.orchestration.context_resolver import resolve_context
2
 
3
+ def test_context_resolution_embedded(embedded_mcp_servers):
4
  context = resolve_context(
5
  text="Invoice for Q4 2025",
6
  metadata={"department": "finance"}
tests/test_mcp_servers.py CHANGED
@@ -1,7 +1,7 @@
1
  from app.orchestration.context_resolver import resolve_context
2
  from app.config import get_settings
3
 
4
- def test_mcp_fail_fast_disabled():
5
  settings = get_settings()
6
  settings.MCP_FAIL_FAST = False
7
 
 
1
  from app.orchestration.context_resolver import resolve_context
2
  from app.config import get_settings
3
 
4
+ def test_mcp_fail_fast_disabled(embedded_mcp_servers):
5
  settings = get_settings()
6
  settings.MCP_FAIL_FAST = False
7
 
ui/streamlit_app.py CHANGED
@@ -1,6 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import requests
3
- from pathlib import Path
4
  from app.config import get_settings
5
 
6
  settings = get_settings()
 
1
+ import sys
2
+ from pathlib import Path
3
+
4
+ # -------------------------
5
+ # Ensure 'app' folder is importable
6
+ # -------------------------
7
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent
8
+ if str(PROJECT_ROOT) not in sys.path:
9
+ sys.path.append(str(PROJECT_ROOT))
10
+
11
+ # -------------------------
12
+ # Imports
13
+ # -------------------------
14
  import streamlit as st
15
  import requests
 
16
  from app.config import get_settings
17
 
18
  settings = get_settings()