Dipan04 commited on
Commit
2c41dce
·
0 Parent(s):

Initial clean commit for Hugging Face Space

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .env
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ env/
7
+ venv/
8
+ ENV/
9
+ env.bak/
10
+ echo test1.png >> .gitignore
11
+ git add .gitignore
Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Multi-stage build for optimized image size
2
+ FROM python:3.11-slim as base
3
+
4
+ # Set environment variables
5
+ ENV PYTHONUNBUFFERED=1 \
6
+ PYTHONDONTWRITEBYTECODE=1 \
7
+ PIP_NO_CACHE_DIR=1 \
8
+ PIP_DISABLE_PIP_VERSION_CHECK=1
9
+
10
+ # Install system dependencies
11
+ # Tesseract is optional but included for OCR support
12
+ RUN apt-get update && apt-get install -y --no-install-recommends \
13
+ tesseract-ocr \
14
+ tesseract-ocr-eng \
15
+ && rm -rf /var/lib/apt/lists/*
16
+
17
+ # Create app directory
18
+ WORKDIR /app
19
+
20
+ # Copy requirements first for better caching
21
+ COPY requirements.txt .
22
+
23
+ # Install Python dependencies
24
+ RUN pip install --no-cache-dir -r requirements.txt
25
+
26
+ # Copy application code
27
+ COPY . .
28
+
29
+ # Create non-root user for security
30
+ RUN useradd -m -u 1000 appuser && \
31
+ chown -R appuser:appuser /app
32
+
33
+ USER appuser
34
+
35
+ # Expose port
36
+ EXPOSE 8000
37
+
38
+ # Health check
39
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
40
+ CMD python -c "import requests; requests.get('http://localhost:8000/health')" || exit 1
41
+
42
+ # Run application
43
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
agents/hashing_agent.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hashing Agent
3
+ Generates cryptographic hashes for content verification.
4
+ """
5
+
6
+ from typing import Dict, Any
7
+ import hashlib
8
+ from datetime import datetime, timezone
9
+
10
+ from core.agent_base import Agent
11
+ from core.errors import HashingError
12
+ from config.settings import settings
13
+
14
+
15
+ class HashingAgent(Agent):
16
+ """
17
+ Generates SHA-256 hash for files or text content.
18
+ """
19
+
20
+ def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
21
+ """
22
+ Generate cryptographic hash.
23
+
24
+ Expected input_data:
25
+ {
26
+ "content": bytes,
27
+ ...other fields...
28
+ }
29
+
30
+ Returns:
31
+ {
32
+ "content_hash": str,
33
+ "hash_algorithm": str,
34
+ "hash_timestamp": str,
35
+ ...passes through input_data...
36
+ }
37
+ """
38
+ content = input_data.get("content")
39
+
40
+ if not content:
41
+ raise HashingError("Missing 'content' field")
42
+
43
+ if not isinstance(content, bytes):
44
+ raise HashingError("Content must be bytes")
45
+
46
+ # Generate hash
47
+ algorithm = settings.HASH_ALGORITHM
48
+ hash_value = self._compute_hash(content, algorithm)
49
+
50
+ # Add hash info to input data
51
+ result = input_data.copy()
52
+ result.update({
53
+ "content_hash": hash_value,
54
+ "hash_algorithm": algorithm,
55
+ "hash_timestamp": datetime.now(timezone.utc).isoformat(),
56
+ })
57
+
58
+ return result
59
+
60
+ def _compute_hash(self, content: bytes, algorithm: str) -> str:
61
+ """
62
+ Compute hash using specified algorithm.
63
+
64
+ Args:
65
+ content: Content bytes to hash
66
+ algorithm: Hash algorithm name (e.g., 'sha256')
67
+
68
+ Returns:
69
+ Hexadecimal hash string
70
+ """
71
+ try:
72
+ hasher = hashlib.new(algorithm)
73
+ hasher.update(content)
74
+ return hasher.hexdigest()
75
+ except ValueError as e:
76
+ raise HashingError(f"Unsupported hash algorithm: {algorithm}") from e
77
+ except Exception as e:
78
+ raise HashingError(f"Hash computation failed: {str(e)}") from e
agents/input_validator.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Input Validation Agent
3
+ Validates and normalizes input data (file or text) into a standard format.
4
+ """
5
+
6
+ from typing import Dict, Any, Optional
7
+ import mimetypes
8
+ from pathlib import Path
9
+
10
+ from core.agent_base import Agent
11
+ from core.errors import ValidationError
12
+ from config.settings import settings
13
+
14
+
15
+ class InputValidatorAgent(Agent):
16
+ """
17
+ Validates input type, size, format and normalizes to internal format.
18
+ """
19
+
20
+ def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
21
+ """
22
+ Validate and normalize input.
23
+
24
+ Expected input_data:
25
+ {
26
+ "type": "file" | "text",
27
+ "content": bytes | str,
28
+ "filename": str (optional, for files),
29
+ }
30
+
31
+ Returns:
32
+ {
33
+ "content": bytes,
34
+ "content_type": str,
35
+ "size": int,
36
+ "filename": str | None,
37
+ "validation_status": "valid"
38
+ }
39
+ """
40
+ input_type = input_data.get("type")
41
+ content = input_data.get("content")
42
+
43
+ if not input_type or not content:
44
+ raise ValidationError("Missing required fields: 'type' and 'content'")
45
+
46
+ if input_type == "file":
47
+ return self._validate_file(content, input_data.get("filename"))
48
+ elif input_type == "text":
49
+ return self._validate_text(content)
50
+ else:
51
+ raise ValidationError(f"Invalid input type: {input_type}")
52
+
53
+ def _validate_file(self, content: bytes, filename: Optional[str]) -> Dict[str, Any]:
54
+ """Validate file input."""
55
+ if not isinstance(content, bytes):
56
+ raise ValidationError("File content must be bytes")
57
+
58
+ if len(content) == 0:
59
+ raise ValidationError("File is empty")
60
+
61
+ size = len(content)
62
+ max_size = settings.get_max_file_size_bytes()
63
+
64
+ if size > max_size:
65
+ raise ValidationError(
66
+ f"File size {size} bytes exceeds maximum {max_size} bytes"
67
+ )
68
+
69
+ # Detect content type
70
+ content_type = "application/octet-stream"
71
+ if filename:
72
+ guessed_type, _ = mimetypes.guess_type(filename)
73
+ if guessed_type:
74
+ content_type = guessed_type
75
+
76
+ return {
77
+ "content": content,
78
+ "content_type": content_type,
79
+ "size": size,
80
+ "filename": filename,
81
+ "validation_status": "valid"
82
+ }
83
+
84
+ def _validate_text(self, content: str) -> Dict[str, Any]:
85
+ """Validate text input."""
86
+ if not isinstance(content, str):
87
+ raise ValidationError("Text content must be string")
88
+
89
+ if not content.strip():
90
+ raise ValidationError("Text is empty")
91
+
92
+ # Convert to bytes for consistent handling
93
+ content_bytes = content.encode('utf-8')
94
+ size = len(content_bytes)
95
+
96
+ max_size = settings.get_max_file_size_bytes()
97
+ if size > max_size:
98
+ raise ValidationError(
99
+ f"Text size {size} bytes exceeds maximum {max_size} bytes"
100
+ )
101
+
102
+ return {
103
+ "content": content_bytes,
104
+ "content_type": "text/plain",
105
+ "size": size,
106
+ "filename": None,
107
+ "validation_status": "valid"
108
+ }
agents/metadata_agent.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Metadata Agent
3
+ Generates structured metadata for proof objects.
4
+ """
5
+
6
+ from typing import Dict, Any
7
+ from datetime import datetime, timezone
8
+
9
+ from core.agent_base import Agent
10
+ from core.errors import MetadataError
11
+
12
+
13
+ class MetadataAgent(Agent):
14
+ """
15
+ Creates comprehensive metadata for proof generation.
16
+ """
17
+
18
+ def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
19
+ """
20
+ Generate metadata object.
21
+
22
+ Expected input_data:
23
+ {
24
+ "content_type": str,
25
+ "size": int,
26
+ "filename": str | None,
27
+ "content_hash": str,
28
+ "hash_algorithm": str,
29
+ "hash_timestamp": str,
30
+ "validation_status": str,
31
+ ...
32
+ }
33
+
34
+ Returns:
35
+ {
36
+ "metadata": {
37
+ "content_type": str,
38
+ "content_size": int,
39
+ "filename": str | None,
40
+ "hash_reference": str,
41
+ "created_at": str,
42
+ "validation_status": str,
43
+ },
44
+ ...passes through input_data...
45
+ }
46
+ """
47
+ try:
48
+ metadata = {
49
+ "content_type": input_data.get("content_type"),
50
+ "content_size": input_data.get("size"),
51
+ "filename": input_data.get("filename"),
52
+ "hash_reference": input_data.get("content_hash"),
53
+ "hash_algorithm": input_data.get("hash_algorithm"),
54
+ "created_at": datetime.now(timezone.utc).isoformat(),
55
+ "validation_status": input_data.get("validation_status"),
56
+ }
57
+
58
+ # Validate required fields
59
+ required = ["content_type", "content_size", "hash_reference"]
60
+ missing = [k for k in required if metadata.get(k) is None]
61
+
62
+ if missing:
63
+ raise MetadataError(f"Missing required fields: {', '.join(missing)}")
64
+
65
+ result = input_data.copy()
66
+ result["metadata"] = metadata
67
+
68
+ return result
69
+
70
+ except Exception as e:
71
+ if isinstance(e, MetadataError):
72
+ raise
73
+ raise MetadataError(f"Metadata generation failed: {str(e)}") from e
agents/proof_builder.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Proof Builder Agent
3
+ Assembles final proof object from validated components.
4
+ """
5
+
6
+ from typing import Dict, Any
7
+ import uuid
8
+
9
+ from core.agent_base import Agent
10
+ from core.errors import ProofSystemError
11
+ from models.proof import Proof
12
+
13
+
14
+ class ProofBuilderAgent(Agent):
15
+ """
16
+ Combines hash, metadata, and validation results into a Proof object.
17
+ """
18
+
19
+ def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
20
+ """
21
+ Build final proof object.
22
+
23
+ Expected input_data:
24
+ {
25
+ "content_hash": str,
26
+ "hash_algorithm": str,
27
+ "metadata": dict,
28
+ "validation_status": str,
29
+ "extracted_text": str | None,
30
+ "ocr_engine": str | None,
31
+ "ocr_status": str | None,
32
+ ...
33
+ }
34
+
35
+ Returns:
36
+ {
37
+ "proof": Proof,
38
+ "proof_id": str
39
+ }
40
+ """
41
+ try:
42
+ # Generate unique proof ID
43
+ proof_id = self._generate_proof_id()
44
+
45
+ # Extract required fields
46
+ metadata = input_data.get("metadata", {})
47
+
48
+ # Build proof object (including OCR fields)
49
+ proof = Proof(
50
+ proof_id=proof_id,
51
+ content_hash=input_data.get("content_hash"),
52
+ hash_algorithm=input_data.get("hash_algorithm"),
53
+ content_type=metadata.get("content_type"),
54
+ content_size=metadata.get("content_size"),
55
+ timestamp=metadata.get("created_at"),
56
+ validation_status=input_data.get("validation_status"),
57
+ metadata=metadata,
58
+ extracted_text=input_data.get("extracted_text"),
59
+ ocr_engine=input_data.get("ocr_engine"),
60
+ ocr_status=input_data.get("ocr_status")
61
+ )
62
+
63
+ return {
64
+ "proof": proof,
65
+ "proof_id": proof_id
66
+ }
67
+
68
+ except Exception as e:
69
+ raise ProofSystemError(f"Proof building failed: {str(e)}") from e
70
+
71
+ def _generate_proof_id(self) -> str:
72
+ """
73
+ Generate unique proof identifier.
74
+
75
+ Returns:
76
+ UUID string
77
+ """
78
+ return str(uuid.uuid4())
agents/storage_agent.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Storage Agent
3
+ Abstract storage interface with Supabase implementation stub.
4
+ """
5
+
6
+ from typing import Dict, Any, Optional
7
+ from abc import abstractmethod
8
+
9
+ from core.agent_base import Agent
10
+ from core.errors import StorageError, ProofNotFoundError
11
+ from models.proof import Proof
12
+ from config.settings import settings
13
+
14
+
15
+ class StorageAgent(Agent):
16
+ """
17
+ Abstract storage interface for proof persistence.
18
+ """
19
+
20
+ @abstractmethod
21
+ def save_proof(self, proof: Proof) -> Dict[str, Any]:
22
+ """
23
+ Save proof to storage.
24
+
25
+ Args:
26
+ proof: Proof object to save
27
+
28
+ Returns:
29
+ Storage response with proof_id and status
30
+ """
31
+ pass
32
+
33
+ @abstractmethod
34
+ def get_proof(self, proof_id: str) -> Optional[Proof]:
35
+ """
36
+ Retrieve proof from storage.
37
+
38
+ Args:
39
+ proof_id: Unique proof identifier
40
+
41
+ Returns:
42
+ Proof object if found, None otherwise
43
+ """
44
+ pass
45
+
46
+ def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
47
+ """
48
+ Execute storage operation based on action.
49
+
50
+ Expected input_data:
51
+ {
52
+ "action": "save" | "get",
53
+ "proof": Proof (for save),
54
+ "proof_id": str (for get)
55
+ }
56
+ """
57
+ action = input_data.get("action")
58
+
59
+ if action == "save":
60
+ proof = input_data.get("proof")
61
+ if not proof:
62
+ raise StorageError("Missing 'proof' for save action")
63
+ return self.save_proof(proof)
64
+
65
+ elif action == "get":
66
+ proof_id = input_data.get("proof_id")
67
+ if not proof_id:
68
+ raise StorageError("Missing 'proof_id' for get action")
69
+ proof = self.get_proof(proof_id)
70
+ if not proof:
71
+ raise ProofNotFoundError(f"Proof not found: {proof_id}")
72
+ return {"proof": proof}
73
+
74
+ else:
75
+ raise StorageError(f"Invalid action: {action}")
76
+
77
+
78
+ class SupabaseStorageAgent(StorageAgent):
79
+ """
80
+ Supabase-backed storage implementation.
81
+ Uses environment variables for credentials - never hardcoded.
82
+ """
83
+
84
+ def __init__(self):
85
+ super().__init__()
86
+ self._client = None
87
+ self._init_client()
88
+
89
+ def _init_client(self):
90
+ """
91
+ Initialize Supabase client using environment variables.
92
+ This is a stub - actual implementation would use supabase-py.
93
+ """
94
+ if not settings.validate():
95
+ raise StorageError(
96
+ "Supabase credentials not configured. "
97
+ "Set SUPABASE_URL and SUPABASE_KEY environment variables."
98
+ )
99
+
100
+ # TODO: Initialize actual Supabase client when library is added
101
+ # from supabase import create_client
102
+ # self._client = create_client(settings.SUPABASE_URL, settings.SUPABASE_KEY)
103
+
104
+ # For now, using in-memory storage for testing
105
+ self._memory_store = {}
106
+
107
+ def save_proof(self, proof: Proof) -> Dict[str, Any]:
108
+ """
109
+ Save proof to Supabase table.
110
+ Currently using in-memory stub.
111
+ """
112
+ try:
113
+ proof_data = proof.to_dict()
114
+
115
+ # TODO: Replace with actual Supabase insert
116
+ # response = self._client.table(settings.SUPABASE_TABLE).insert(proof_data).execute()
117
+
118
+ # In-memory stub
119
+ self._memory_store[proof.proof_id] = proof_data
120
+
121
+ return {
122
+ "success": True,
123
+ "proof_id": proof.proof_id,
124
+ "message": "Proof saved successfully"
125
+ }
126
+
127
+ except Exception as e:
128
+ raise StorageError(f"Failed to save proof: {str(e)}") from e
129
+
130
+ def get_proof(self, proof_id: str) -> Optional[Proof]:
131
+ """
132
+ Retrieve proof from Supabase table.
133
+ Currently using in-memory stub.
134
+ """
135
+ try:
136
+ # TODO: Replace with actual Supabase query
137
+ # response = self._client.table(settings.SUPABASE_TABLE).select("*").eq("proof_id", proof_id).execute()
138
+
139
+ # In-memory stub
140
+ proof_data = self._memory_store.get(proof_id)
141
+
142
+ if not proof_data:
143
+ return None
144
+
145
+ return Proof.from_dict(proof_data)
146
+
147
+ except Exception as e:
148
+ raise StorageError(f"Failed to retrieve proof: {str(e)}") from e
agents/text_extraction_agent.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text Extraction Agent (OCR)
3
+ Deterministic preprocessing agent for extracting text from images and PDFs.
4
+ Uses Tesseract OCR - a deterministic, non-AI algorithm.
5
+ """
6
+
7
+ from typing import Dict, Any, Optional
8
+ import io
9
+ import logging
10
+
11
+ from core.agent_base import Agent
12
+ from core.errors import (
13
+ OCRNotApplicableError,
14
+ OCRProcessingError,
15
+ OCRDependencyMissingError
16
+ )
17
+ from config.settings import settings
18
+
19
+ # Lazy imports - only load if OCR is needed
20
+ try:
21
+ import pytesseract
22
+ from PIL import Image
23
+ TESSERACT_AVAILABLE = True
24
+ except ImportError:
25
+ TESSERACT_AVAILABLE = False
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class TextExtractionAgent(Agent):
31
+ """
32
+ Extracts text from images and PDFs using Tesseract OCR.
33
+ This is a deterministic preprocessing step, not AI.
34
+ """
35
+
36
+ # Content types that support OCR
37
+ OCR_SUPPORTED_TYPES = {
38
+ "image/png",
39
+ "image/jpeg",
40
+ "image/jpg",
41
+ "image/tiff",
42
+ "image/bmp",
43
+ "image/gif",
44
+ }
45
+
46
+ def __init__(self):
47
+ super().__init__()
48
+ self._check_dependencies()
49
+
50
+ def _check_dependencies(self):
51
+ """Check if Tesseract is available."""
52
+ if not TESSERACT_AVAILABLE:
53
+ logger.warning(
54
+ "Tesseract dependencies not available. "
55
+ "Install with: pip install pytesseract pillow"
56
+ )
57
+ return
58
+
59
+ try:
60
+ # Verify Tesseract binary is accessible
61
+ pytesseract.get_tesseract_version()
62
+ logger.info("Tesseract OCR is available and ready")
63
+ except Exception as e:
64
+ logger.warning(
65
+ f"Tesseract binary not found in PATH: {str(e)}. "
66
+ "OCR will be skipped for all inputs."
67
+ )
68
+
69
+ def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
70
+ """
71
+ Extract text from content if applicable.
72
+
73
+ Expected input_data:
74
+ {
75
+ "content": bytes,
76
+ "content_type": str,
77
+ "size": int,
78
+ ...other fields from validation...
79
+ }
80
+
81
+ Returns:
82
+ {
83
+ "extracted_text": str | None,
84
+ "ocr_engine": str | None,
85
+ "ocr_status": "success" | "skipped" | "failed",
86
+ "ocr_confidence": float | None, # Future enhancement
87
+ ...passes through input_data...
88
+ }
89
+ """
90
+ # Skip if OCR is globally disabled
91
+ if not settings.OCR_ENABLED:
92
+ logger.debug("OCR is disabled in settings")
93
+ return self._skip_ocr(input_data, "disabled")
94
+
95
+ # Check if dependencies are available
96
+ if not TESSERACT_AVAILABLE:
97
+ logger.debug("OCR dependencies not available")
98
+ return self._skip_ocr(input_data, "dependencies_missing")
99
+
100
+ content_type = input_data.get("content_type", "")
101
+ content = input_data.get("content")
102
+
103
+ # Check if content type supports OCR
104
+ if not self._is_ocr_applicable(content_type):
105
+ logger.debug(f"OCR not applicable for content type: {content_type}")
106
+ return self._skip_ocr(input_data, "not_applicable")
107
+
108
+ # Attempt OCR extraction
109
+ try:
110
+ extracted_text = self._extract_text(content, content_type)
111
+
112
+ result = input_data.copy()
113
+ result.update({
114
+ "extracted_text": extracted_text,
115
+ "ocr_engine": "tesseract",
116
+ "ocr_status": "success",
117
+ "ocr_confidence": None, # Tesseract confidence available but not used in MVP
118
+ })
119
+
120
+ logger.info(
121
+ f"OCR successful: extracted {len(extracted_text)} characters"
122
+ )
123
+ return result
124
+
125
+ except Exception as e:
126
+ logger.error(f"OCR processing failed: {str(e)}")
127
+ return self._skip_ocr(input_data, "failed", error=str(e))
128
+
129
+ def _is_ocr_applicable(self, content_type: str) -> bool:
130
+ """
131
+ Check if OCR is applicable for this content type.
132
+
133
+ Args:
134
+ content_type: MIME type of the content
135
+
136
+ Returns:
137
+ True if OCR should be attempted
138
+ """
139
+ return content_type.lower() in self.OCR_SUPPORTED_TYPES
140
+
141
+ def _extract_text(self, content: bytes, content_type: str) -> str:
142
+ """
143
+ Extract text using Tesseract OCR.
144
+
145
+ Args:
146
+ content: Image bytes
147
+ content_type: MIME type
148
+
149
+ Returns:
150
+ Extracted text string
151
+
152
+ Raises:
153
+ OCRProcessingError: If extraction fails
154
+ """
155
+ try:
156
+ # Convert bytes to PIL Image
157
+ image = Image.open(io.BytesIO(content))
158
+
159
+ # Perform OCR with configured language
160
+ text = pytesseract.image_to_string(
161
+ image,
162
+ lang=settings.OCR_LANGUAGE,
163
+ config='--psm 3' # Fully automatic page segmentation
164
+ )
165
+
166
+ # Clean up extracted text
167
+ text = text.strip()
168
+
169
+ if not text:
170
+ logger.warning("OCR completed but no text was extracted")
171
+
172
+ return text
173
+
174
+ except pytesseract.TesseractNotFoundError as e:
175
+ raise OCRDependencyMissingError(
176
+ "Tesseract binary not found. Please install Tesseract OCR."
177
+ ) from e
178
+ except Exception as e:
179
+ raise OCRProcessingError(
180
+ f"Text extraction failed: {str(e)}"
181
+ ) from e
182
+
183
+ def _skip_ocr(
184
+ self,
185
+ input_data: Dict[str, Any],
186
+ reason: str,
187
+ error: Optional[str] = None
188
+ ) -> Dict[str, Any]:
189
+ """
190
+ Return input data with OCR skipped.
191
+
192
+ Args:
193
+ input_data: Original input data
194
+ reason: Reason for skipping
195
+ error: Optional error message
196
+
197
+ Returns:
198
+ Input data with OCR status = skipped/failed
199
+ """
200
+ result = input_data.copy()
201
+
202
+ ocr_status = "skipped" if reason != "failed" else "failed"
203
+
204
+ result.update({
205
+ "extracted_text": None,
206
+ "ocr_engine": None,
207
+ "ocr_status": ocr_status,
208
+ "ocr_confidence": None,
209
+ })
210
+
211
+ if error:
212
+ result["ocr_error"] = error
213
+
214
+ return result
agents/verification_agent.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Verification Agent
3
+ Verifies proof authenticity by recomputing and comparing hashes.
4
+ """
5
+
6
+ from typing import Dict, Any
7
+ import hashlib
8
+ from datetime import datetime, timezone
9
+
10
+ from core.agent_base import Agent
11
+ from core.errors import VerificationError
12
+ from models.proof import VerificationResult, Proof
13
+ from agents.storage_agent import StorageAgent
14
+
15
+
16
+ class VerificationAgent(Agent):
17
+ """
18
+ Verifies proof by fetching from storage and recomputing hash.
19
+ """
20
+
21
+ def __init__(self, storage_agent: StorageAgent):
22
+ super().__init__()
23
+ self.storage_agent = storage_agent
24
+
25
+ def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
26
+ """
27
+ Verify proof authenticity.
28
+
29
+ Expected input_data:
30
+ {
31
+ "proof_id": str,
32
+ "content": bytes # Original content to verify
33
+ }
34
+
35
+ Returns:
36
+ {
37
+ "verification_result": VerificationResult
38
+ }
39
+ """
40
+ proof_id = input_data.get("proof_id")
41
+ content = input_data.get("content")
42
+
43
+ if not proof_id:
44
+ raise VerificationError("Missing 'proof_id'")
45
+
46
+ if not content:
47
+ raise VerificationError("Missing 'content' to verify")
48
+
49
+ # Fetch original proof
50
+ proof = self.storage_agent.get_proof(proof_id)
51
+
52
+ if not proof:
53
+ raise VerificationError(f"Proof not found: {proof_id}")
54
+
55
+ # Recompute hash
56
+ computed_hash = self._compute_hash(content, proof.hash_algorithm)
57
+
58
+ # Compare hashes
59
+ is_valid = computed_hash == proof.content_hash
60
+
61
+ result = VerificationResult(
62
+ proof_id=proof_id,
63
+ is_valid=is_valid,
64
+ original_hash=proof.content_hash,
65
+ computed_hash=computed_hash,
66
+ timestamp=datetime.now(timezone.utc).isoformat(),
67
+ message="Hash match: proof is valid" if is_valid else "Hash mismatch: proof is invalid"
68
+ )
69
+
70
+ return {
71
+ "verification_result": result
72
+ }
73
+
74
+ def _compute_hash(self, content: bytes, algorithm: str) -> str:
75
+ """
76
+ Compute hash using specified algorithm.
77
+
78
+ Args:
79
+ content: Content bytes to hash
80
+ algorithm: Hash algorithm name
81
+
82
+ Returns:
83
+ Hexadecimal hash string
84
+ """
85
+ try:
86
+ hasher = hashlib.new(algorithm)
87
+ hasher.update(content)
88
+ return hasher.hexdigest()
89
+ except Exception as e:
90
+ raise VerificationError(f"Hash computation failed: {str(e)}") from e
app.py ADDED
@@ -0,0 +1,459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI Application - Thin HTTP translation layer
3
+ Stateless server that delegates all business logic to the orchestrator.
4
+
5
+ Design principles:
6
+ - No business logic in this file
7
+ - Orchestrator handles all operations
8
+ - Clean error handling with typed exceptions
9
+ - Graceful degradation for optional features (OCR, AI)
10
+ """
11
+
12
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Form
13
+ from fastapi.responses import JSONResponse
14
+ from fastapi.middleware.cors import CORSMiddleware
15
+ from pydantic import BaseModel, Field
16
+ from typing import Optional
17
+ import logging
18
+ from contextlib import asynccontextmanager
19
+ import os
20
+
21
+ # Load environment variables
22
+ from dotenv import load_dotenv
23
+ load_dotenv()
24
+
25
+ from core.orchestrator import Orchestrator
26
+ from core.errors import (
27
+ ProofSystemError,
28
+ ValidationError,
29
+ ProofNotFoundError,
30
+ OCRError
31
+ )
32
+ from config.settings import settings
33
+
34
+ # Configure logging
35
+ logging.basicConfig(
36
+ level=logging.INFO,
37
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
38
+ )
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ # ============================================================================
43
+ # APPLICATION LIFECYCLE
44
+ # ============================================================================
45
+
46
+ # Global orchestrator instance (initialized at startup)
47
+ orchestrator: Optional[Orchestrator] = None
48
+
49
+
50
+ @asynccontextmanager
51
+ async def lifespan(app: FastAPI):
52
+ """
53
+ Application lifespan handler.
54
+ Initialize orchestrator at startup, cleanup at shutdown.
55
+ """
56
+ global orchestrator
57
+
58
+ logger.info("Starting Proof-of-Existence API")
59
+ logger.info(f"OCR Enabled: {settings.OCR_ENABLED}")
60
+ logger.info(f"AI Enabled: {settings.AI_ENABLED}")
61
+
62
+ # Initialize orchestrator
63
+ try:
64
+ orchestrator = Orchestrator()
65
+ logger.info("Orchestrator initialized successfully")
66
+ except Exception as e:
67
+ logger.error(f"Failed to initialize orchestrator: {str(e)}")
68
+ raise
69
+
70
+ yield
71
+
72
+ # Cleanup (if needed)
73
+ logger.info("Shutting down Proof-of-Existence API")
74
+
75
+
76
+ # ============================================================================
77
+ # FASTAPI APP INITIALIZATION
78
+ # ============================================================================
79
+
80
+ app = FastAPI(
81
+ title="Proof-of-Existence API",
82
+ description="Deterministic proof generation and verification with optional OCR and AI assistance",
83
+ version="0.4.0",
84
+ lifespan=lifespan
85
+ )
86
+
87
+ # CORS middleware (configure as needed)
88
+ app.add_middleware(
89
+ CORSMiddleware,
90
+ allow_origins=["*"], # Configure appropriately for production
91
+ allow_credentials=True,
92
+ allow_methods=["*"],
93
+ allow_headers=["*"],
94
+ )
95
+
96
+
97
+ # ============================================================================
98
+ # REQUEST/RESPONSE MODELS
99
+ # ============================================================================
100
+
101
+ class TextProofRequest(BaseModel):
102
+ """Request model for creating proof from text content."""
103
+ content: str = Field(..., description="Text content to create proof for")
104
+ metadata: Optional[dict] = Field(None, description="Optional metadata")
105
+
106
+
107
+ class VerifyProofRequest(BaseModel):
108
+ """Request model for verifying a proof."""
109
+ proof_id: str = Field(..., description="Unique proof identifier")
110
+ content: str = Field(..., description="Content to verify against proof")
111
+
112
+
113
+ class AssistantRequest(BaseModel):
114
+ """Request model for AI assistant queries."""
115
+ question: str = Field(..., description="Question to ask the AI assistant")
116
+ proof_id: Optional[str] = Field(None, description="Optional proof ID for context")
117
+
118
+
119
+ class ProofResponse(BaseModel):
120
+ """Standard proof creation response."""
121
+ success: bool
122
+ proof_id: Optional[str] = None
123
+ hash: Optional[str] = None
124
+ timestamp: Optional[str] = None
125
+ message: str
126
+ assistant: Optional[dict] = None
127
+
128
+
129
+ class VerificationResponse(BaseModel):
130
+ """Standard verification response."""
131
+ success: bool
132
+ is_valid: Optional[bool] = None
133
+ message: str
134
+ assistant: Optional[dict] = None
135
+
136
+
137
+ # ============================================================================
138
+ # ERROR HANDLERS
139
+ # ============================================================================
140
+
141
+ @app.exception_handler(ValidationError)
142
+ async def validation_error_handler(request, exc: ValidationError):
143
+ """Handle validation errors with 400 Bad Request."""
144
+ return JSONResponse(
145
+ status_code=400,
146
+ content={
147
+ "success": False,
148
+ "error": "validation_error",
149
+ "message": str(exc)
150
+ }
151
+ )
152
+
153
+
154
+ @app.exception_handler(ProofNotFoundError)
155
+ async def proof_not_found_handler(request, exc: ProofNotFoundError):
156
+ """Handle proof not found with 404."""
157
+ return JSONResponse(
158
+ status_code=404,
159
+ content={
160
+ "success": False,
161
+ "error": "proof_not_found",
162
+ "message": str(exc)
163
+ }
164
+ )
165
+
166
+
167
+ @app.exception_handler(ProofSystemError)
168
+ async def proof_system_error_handler(request, exc: ProofSystemError):
169
+ """Handle general proof system errors with 500."""
170
+ logger.error(f"Proof system error: {str(exc)}")
171
+ return JSONResponse(
172
+ status_code=500,
173
+ content={
174
+ "success": False,
175
+ "error": "system_error",
176
+ "message": str(exc)
177
+ }
178
+ )
179
+
180
+
181
+ @app.exception_handler(Exception)
182
+ async def general_exception_handler(request, exc: Exception):
183
+ """Handle unexpected errors with 500."""
184
+ logger.error(f"Unexpected error: {str(exc)}", exc_info=True)
185
+ return JSONResponse(
186
+ status_code=500,
187
+ content={
188
+ "success": False,
189
+ "error": "internal_error",
190
+ "message": "An unexpected error occurred"
191
+ }
192
+ )
193
+
194
+
195
+ # ============================================================================
196
+ # HEALTH CHECK
197
+ # ============================================================================
198
+
199
+ @app.get("/health")
200
+ async def health_check():
201
+ """
202
+ Health check endpoint.
203
+ Returns system status and feature availability.
204
+ """
205
+ return {
206
+ "status": "healthy",
207
+ "version": "0.4.0",
208
+ "features": {
209
+ "ocr": settings.OCR_ENABLED,
210
+ "ai_assistant": settings.AI_ENABLED and orchestrator.ai_sidecar.enabled if orchestrator else False
211
+ }
212
+ }
213
+
214
+
215
+ # ============================================================================
216
+ # PROOF ENDPOINTS
217
+ # ============================================================================
218
+
219
+ @app.post("/proof/create/text", response_model=ProofResponse)
220
+ async def create_proof_from_text(request: TextProofRequest):
221
+ """
222
+ Create a cryptographic proof from text content.
223
+
224
+ This endpoint:
225
+ 1. Validates text input
226
+ 2. Generates SHA-256 hash
227
+ 3. Creates proof with timestamp
228
+ 4. Stores proof in database
229
+ 5. Optionally provides AI explanation
230
+
231
+ Returns:
232
+ Proof ID, hash, timestamp, and optional AI explanation
233
+ """
234
+ if not orchestrator:
235
+ raise HTTPException(status_code=503, detail="Service not initialized")
236
+
237
+ logger.info(f"Creating proof from text ({len(request.content)} chars)")
238
+
239
+ result = orchestrator.create_proof({
240
+ "type": "text",
241
+ "content": request.content
242
+ })
243
+
244
+ if not result["success"]:
245
+ raise HTTPException(
246
+ status_code=500,
247
+ detail=result.get("message", "Failed to create proof")
248
+ )
249
+
250
+ proof = result["proof"]
251
+
252
+ return ProofResponse(
253
+ success=True,
254
+ proof_id=proof.proof_id,
255
+ hash=proof.content_hash,
256
+ timestamp=proof.timestamp,
257
+ message="Proof created successfully",
258
+ assistant=result.get("assistant")
259
+ )
260
+
261
+
262
+ @app.post("/proof/create/file")
263
+ async def create_proof_from_file(
264
+ file: UploadFile = File(...),
265
+ metadata: Optional[str] = Form(None)
266
+ ):
267
+ """
268
+ Create a cryptographic proof from uploaded file.
269
+
270
+ Supports:
271
+ - Text files
272
+ - Images (with optional OCR)
273
+ - PDFs
274
+ - Binary files
275
+
276
+ Returns:
277
+ Proof ID, hash, timestamp, OCR results (if applicable), and optional AI explanation
278
+ """
279
+ if not orchestrator:
280
+ raise HTTPException(status_code=503, detail="Service not initialized")
281
+
282
+ logger.info(f"Creating proof from file: {file.filename} ({file.content_type})")
283
+
284
+ # Read file content
285
+ content = await file.read()
286
+
287
+ if len(content) == 0:
288
+ raise ValidationError("File is empty")
289
+
290
+ result = orchestrator.create_proof({
291
+ "type": "file",
292
+ "content": content,
293
+ "filename": file.filename
294
+ })
295
+
296
+ if not result["success"]:
297
+ raise HTTPException(
298
+ status_code=500,
299
+ detail=result.get("message", "Failed to create proof")
300
+ )
301
+
302
+ proof = result["proof"]
303
+
304
+ return {
305
+ "success": True,
306
+ "proof_id": proof.proof_id,
307
+ "hash": proof.content_hash,
308
+ "timestamp": proof.timestamp,
309
+ "content_type": proof.content_type,
310
+ "size": proof.content_size,
311
+ "ocr_status": proof.ocr_status,
312
+ "extracted_text": proof.extracted_text[:200] + "..." if proof.extracted_text and len(proof.extracted_text) > 200 else proof.extracted_text,
313
+ "message": "Proof created successfully",
314
+ "assistant": result.get("assistant")
315
+ }
316
+
317
+
318
+ @app.get("/proof/{proof_id}")
319
+ async def get_proof(proof_id: str):
320
+ """
321
+ Retrieve a stored proof by ID.
322
+
323
+ Args:
324
+ proof_id: Unique proof identifier
325
+
326
+ Returns:
327
+ Complete proof object with all metadata
328
+ """
329
+ if not orchestrator:
330
+ raise HTTPException(status_code=503, detail="Service not initialized")
331
+
332
+ logger.info(f"Retrieving proof: {proof_id}")
333
+
334
+ result = orchestrator.get_proof(proof_id)
335
+
336
+ if not result["success"]:
337
+ raise ProofNotFoundError(f"Proof not found: {proof_id}")
338
+
339
+ proof = result["proof"]
340
+
341
+ return {
342
+ "success": True,
343
+ "proof": proof.to_dict(),
344
+ "message": "Proof retrieved successfully"
345
+ }
346
+
347
+
348
+ @app.post("/proof/verify", response_model=VerificationResponse)
349
+ async def verify_proof(request: VerifyProofRequest):
350
+ """
351
+ Verify a proof against original content.
352
+
353
+ This endpoint:
354
+ 1. Retrieves original proof
355
+ 2. Recomputes hash from provided content
356
+ 3. Compares hashes
357
+ 4. Returns verification result
358
+ 5. Optionally provides AI explanation
359
+
360
+ Returns:
361
+ Verification status (valid/invalid) with explanation
362
+ """
363
+ if not orchestrator:
364
+ raise HTTPException(status_code=503, detail="Service not initialized")
365
+
366
+ logger.info(f"Verifying proof: {request.proof_id}")
367
+
368
+ # Convert content to bytes
369
+ content_bytes = request.content.encode('utf-8')
370
+
371
+ result = orchestrator.verify_proof(request.proof_id, content_bytes)
372
+
373
+ if not result["success"]:
374
+ raise HTTPException(
375
+ status_code=500,
376
+ detail=result.get("message", "Verification failed")
377
+ )
378
+
379
+ verification_result = result["verification_result"]
380
+
381
+ return VerificationResponse(
382
+ success=True,
383
+ is_valid=verification_result.is_valid,
384
+ message=verification_result.message,
385
+ assistant=result.get("assistant")
386
+ )
387
+
388
+
389
+ # ============================================================================
390
+ # AI ASSISTANT ENDPOINTS (OPTIONAL)
391
+ # ============================================================================
392
+
393
+ @app.post("/assistant/ask")
394
+ async def ask_assistant(request: AssistantRequest):
395
+ """
396
+ Ask the AI assistant a question about proofs.
397
+
398
+ This is an OPTIONAL feature that provides explanations and guidance.
399
+ The assistant is non-authoritative and never affects proof validity.
400
+
401
+ Returns:
402
+ AI-generated explanation (clearly marked as non-authoritative)
403
+ """
404
+ if not orchestrator:
405
+ raise HTTPException(status_code=503, detail="Service not initialized")
406
+
407
+ if not settings.AI_ENABLED or not orchestrator.ai_sidecar.enabled:
408
+ return {
409
+ "success": False,
410
+ "message": "AI assistant is not enabled. Set AI_ENABLED=true and configure GEMINI_API_KEY."
411
+ }
412
+
413
+ logger.info(f"AI assistant query: {request.question[:50]}...")
414
+
415
+ result = orchestrator.ask_assistant(request.question, request.proof_id)
416
+
417
+ return result
418
+
419
+
420
+ # ============================================================================
421
+ # DOCUMENTATION ENDPOINTS
422
+ # ============================================================================
423
+
424
+ @app.get("/")
425
+ async def root():
426
+ """
427
+ API root with basic information and links.
428
+ """
429
+ return {
430
+ "name": "Proof-of-Existence API",
431
+ "version": "0.4.0",
432
+ "description": "Deterministic cryptographic proof generation and verification",
433
+ "docs": "/docs",
434
+ "health": "/health",
435
+ "endpoints": {
436
+ "create_text_proof": "POST /proof/create/text",
437
+ "create_file_proof": "POST /proof/create/file",
438
+ "get_proof": "GET /proof/{proof_id}",
439
+ "verify_proof": "POST /proof/verify",
440
+ "ask_assistant": "POST /assistant/ask (optional)"
441
+ }
442
+ }
443
+
444
+
445
+ # ============================================================================
446
+ # DEVELOPMENT SERVER
447
+ # ============================================================================
448
+
449
+ if __name__ == "__main__":
450
+ import uvicorn
451
+
452
+ # Run development server
453
+ uvicorn.run(
454
+ "app:app",
455
+ host="0.0.0.0",
456
+ port=8000,
457
+ reload=True, # Enable auto-reload in development
458
+ log_level="info"
459
+ )
config/settings.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration settings loaded from environment variables.
3
+ Never hardcode credentials or secrets.
4
+ """
5
+
6
+ import os
7
+ from typing import Optional
8
+
9
+
10
+ class Settings:
11
+ """
12
+ Application settings with sensible defaults.
13
+ Override via environment variables.
14
+ """
15
+
16
+ # Storage settings
17
+ SUPABASE_URL: str = os.getenv("SUPABASE_URL", "")
18
+ SUPABASE_KEY: str = os.getenv("SUPABASE_KEY", "")
19
+ SUPABASE_TABLE: str = os.getenv("SUPABASE_TABLE", "proofs")
20
+
21
+ # Hashing settings
22
+ HASH_ALGORITHM: str = os.getenv("HASH_ALGORITHM", "sha256")
23
+
24
+ # Validation settings
25
+ MAX_FILE_SIZE_MB: int = int(os.getenv("MAX_FILE_SIZE_MB", "10"))
26
+ ALLOWED_CONTENT_TYPES: list = [
27
+ "text/plain",
28
+ "application/pdf",
29
+ "image/png",
30
+ "image/jpeg",
31
+ "application/json",
32
+ ]
33
+
34
+ # OCR settings
35
+ OCR_ENABLED: bool = os.getenv("OCR_ENABLED", "true").lower() == "true"
36
+ OCR_LANGUAGE: str = os.getenv("OCR_LANGUAGE", "eng") # Tesseract language code
37
+
38
+ # AI Sidecar settings (non-authoritative)
39
+ AI_ENABLED: bool = os.getenv("AI_ENABLED", "false").lower() == "true"
40
+ GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY", "")
41
+ GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
42
+ GEMINI_TIMEOUT: int = int(os.getenv("GEMINI_TIMEOUT", "10")) # seconds
43
+
44
+ @classmethod
45
+ def validate(cls) -> bool:
46
+ """
47
+ Check if required settings are present.
48
+ Returns True if valid, False otherwise.
49
+ """
50
+ if not cls.SUPABASE_URL or not cls.SUPABASE_KEY:
51
+ return False
52
+ return True
53
+
54
+ @classmethod
55
+ def validate_ai(cls) -> bool:
56
+ """
57
+ Check if AI sidecar is properly configured.
58
+ Returns True if AI can be enabled, False otherwise.
59
+ """
60
+ if not cls.AI_ENABLED:
61
+ return False
62
+ if not cls.GEMINI_API_KEY:
63
+ return False
64
+ return True
65
+
66
+ @classmethod
67
+ def get_max_file_size_bytes(cls) -> int:
68
+ """Get max file size in bytes."""
69
+ return cls.MAX_FILE_SIZE_MB * 1024 * 1024
70
+
71
+
72
+ # Global settings instance
73
+ settings = Settings()
core/agent_base.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Base agent interface for all system agents.
3
+ Ensures consistent behavior across all agent implementations.
4
+ """
5
+
6
+ from abc import ABC, abstractmethod
7
+ from typing import Any, Dict
8
+
9
+
10
+ class Agent(ABC):
11
+ """
12
+ Base class for all agents in the system.
13
+ Each agent must implement the execute method with structured I/O.
14
+ """
15
+
16
+ def __init__(self):
17
+ self.name = self.__class__.__name__
18
+
19
+ @abstractmethod
20
+ def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
21
+ """
22
+ Execute the agent's core logic.
23
+
24
+ Args:
25
+ input_data: Structured input dictionary
26
+
27
+ Returns:
28
+ Structured output dictionary
29
+
30
+ Raises:
31
+ ProofSystemError: On execution failure
32
+ """
33
+ pass
34
+
35
+ def __repr__(self) -> str:
36
+ return f"<{self.name}>"
core/errors.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Custom exception types for the proof-of-existence system.
3
+ All agents raise these typed errors for consistent error handling.
4
+ """
5
+
6
+ class ProofSystemError(Exception):
7
+ """Base exception for all proof system errors."""
8
+ pass
9
+
10
+
11
+ class ValidationError(ProofSystemError):
12
+ """Raised when input validation fails."""
13
+ pass
14
+
15
+
16
+ class HashingError(ProofSystemError):
17
+ """Raised when hashing operation fails."""
18
+ pass
19
+
20
+
21
+ class MetadataError(ProofSystemError):
22
+ """Raised when metadata generation fails."""
23
+ pass
24
+
25
+
26
+ class StorageError(ProofSystemError):
27
+ """Raised when storage operations fail."""
28
+ pass
29
+
30
+
31
+ class VerificationError(ProofSystemError):
32
+ """Raised when proof verification fails."""
33
+ pass
34
+
35
+
36
+ class ProofNotFoundError(ProofSystemError):
37
+ """Raised when a proof cannot be found in storage."""
38
+ pass
39
+
40
+
41
+ class OCRError(ProofSystemError):
42
+ """Base exception for OCR-related errors."""
43
+ pass
44
+
45
+
46
+ class OCRNotApplicableError(OCRError):
47
+ """Raised when OCR is not applicable to the content type."""
48
+ pass
49
+
50
+
51
+ class OCRProcessingError(OCRError):
52
+ """Raised when OCR processing fails."""
53
+ pass
54
+
55
+
56
+ class OCRDependencyMissingError(OCRError):
57
+ """Raised when Tesseract OCR is not installed or not found."""
58
+ pass
core/orchestrator.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Orchestrator Module
3
+ Central controller that coordinates agent execution in a fixed sequence.
4
+ """
5
+
6
+ from typing import Dict, Any, Optional
7
+ import logging
8
+
9
+ from core.agent_base import Agent
10
+ from core.errors import ProofSystemError
11
+ from agents.input_validator import InputValidatorAgent
12
+ from agents.text_extraction_agent import TextExtractionAgent
13
+ from agents.hashing_agent import HashingAgent
14
+ from agents.metadata_agent import MetadataAgent
15
+ from agents.proof_builder import ProofBuilderAgent
16
+ from agents.storage_agent import SupabaseStorageAgent
17
+ from agents.verification_agent import VerificationAgent
18
+ from models.proof import Proof, VerificationResult
19
+ from sidecar.gemini_sidecar import GeminiSidecar
20
+
21
+
22
+ logging.basicConfig(level=logging.INFO)
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class Orchestrator:
27
+ """
28
+ Central orchestrator that manages the proof generation pipeline.
29
+ Coordinates agent execution and handles failures gracefully.
30
+ """
31
+
32
+ def __init__(self):
33
+ """Initialize all agents in the pipeline."""
34
+ self.input_validator = InputValidatorAgent()
35
+ self.text_extraction_agent = TextExtractionAgent()
36
+ self.hashing_agent = HashingAgent()
37
+ self.metadata_agent = MetadataAgent()
38
+ self.proof_builder = ProofBuilderAgent()
39
+ self.storage_agent = SupabaseStorageAgent()
40
+ self.verification_agent = VerificationAgent(self.storage_agent)
41
+
42
+ # AI Sidecar (optional, non-authoritative)
43
+ self.ai_sidecar = GeminiSidecar()
44
+
45
+ logger.info("Orchestrator initialized with all agents")
46
+
47
+ def create_proof(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
48
+ """
49
+ Execute the full proof creation pipeline.
50
+
51
+ Args:
52
+ input_data: {
53
+ "type": "file" | "text",
54
+ "content": bytes | str,
55
+ "filename": str (optional)
56
+ }
57
+
58
+ Returns:
59
+ {
60
+ "success": bool,
61
+ "proof_id": str,
62
+ "proof": Proof,
63
+ "message": str
64
+ }
65
+ """
66
+ try:
67
+ logger.info("Starting proof creation pipeline")
68
+
69
+ # Step 1: Validate input
70
+ logger.info("Step 1/6: Validating input")
71
+ validated_data = self.input_validator.execute(input_data)
72
+
73
+ # Step 2: Extract text (OCR if applicable)
74
+ logger.info("Step 2/6: Text extraction (OCR)")
75
+ ocr_data = self.text_extraction_agent.execute(validated_data)
76
+
77
+ # Step 3: Generate hash (ALWAYS on raw content, never OCR output)
78
+ logger.info("Step 3/6: Generating hash from raw content")
79
+ hashed_data = self.hashing_agent.execute(ocr_data)
80
+
81
+ # Step 4: Generate metadata (includes OCR results)
82
+ logger.info("Step 4/6: Generating metadata")
83
+ metadata_data = self.metadata_agent.execute(hashed_data)
84
+
85
+ # Step 5: Build proof
86
+ logger.info("Step 5/6: Building proof object")
87
+ proof_data = self.proof_builder.execute(metadata_data)
88
+
89
+ # Step 6: Save proof
90
+ logger.info("Step 6/6: Saving proof to storage")
91
+ storage_result = self.storage_agent.save_proof(proof_data["proof"])
92
+
93
+ logger.info(f"Proof created successfully: {proof_data['proof_id']}")
94
+
95
+ # OPTIONAL: AI Sidecar explains the proof (non-blocking)
96
+ # This does NOT affect the core response
97
+ result = {
98
+ "success": True,
99
+ "proof_id": proof_data["proof_id"],
100
+ "proof": proof_data["proof"],
101
+ "message": "Proof created and stored successfully"
102
+ }
103
+
104
+ # Add AI explanation if available (optional, non-authoritative)
105
+ if self.ai_sidecar.enabled:
106
+ try:
107
+ assistant_response = self.ai_sidecar.explain_proof(
108
+ proof_data["proof"]
109
+ )
110
+ result["assistant"] = assistant_response.to_dict()
111
+ logger.info("AI explanation added to response")
112
+ except Exception as e:
113
+ logger.warning(f"AI explanation failed (non-critical): {str(e)}")
114
+ # AI failure does not affect core response
115
+
116
+ return result
117
+
118
+ except ProofSystemError as e:
119
+ logger.error(f"Proof creation failed: {str(e)}")
120
+ return {
121
+ "success": False,
122
+ "error": str(e),
123
+ "error_type": e.__class__.__name__,
124
+ "message": "Proof creation failed"
125
+ }
126
+ except Exception as e:
127
+ logger.error(f"Unexpected error: {str(e)}")
128
+ return {
129
+ "success": False,
130
+ "error": str(e),
131
+ "error_type": "UnexpectedError",
132
+ "message": "An unexpected error occurred"
133
+ }
134
+
135
+ def ask_assistant(
136
+ self,
137
+ question: str,
138
+ proof_id: Optional[str] = None
139
+ ) -> Dict[str, Any]:
140
+ """
141
+ Ask the AI assistant a question about a proof.
142
+ This is a separate, optional endpoint - not part of core flow.
143
+
144
+ Args:
145
+ question: User's question
146
+ proof_id: Optional proof ID for context
147
+
148
+ Returns:
149
+ {
150
+ "success": bool,
151
+ "assistant": AssistantResponse dict,
152
+ "message": str
153
+ }
154
+ """
155
+ if not self.ai_sidecar.enabled:
156
+ return {
157
+ "success": False,
158
+ "message": "AI assistant is not enabled. Set AI_ENABLED=true and configure GEMINI_API_KEY."
159
+ }
160
+
161
+ try:
162
+ logger.info(f"AI assistant query: {question[:50]}...")
163
+
164
+ # Get proof if provided
165
+ proof = None
166
+ if proof_id:
167
+ proof = self.storage_agent.get_proof(proof_id)
168
+
169
+ assistant_response = self.ai_sidecar.answer_question(
170
+ question,
171
+ proof
172
+ )
173
+
174
+ return {
175
+ "success": True,
176
+ "assistant": assistant_response.to_dict(),
177
+ "message": "Question answered"
178
+ }
179
+
180
+ except Exception as e:
181
+ logger.error(f"AI assistant query failed: {str(e)}")
182
+ return {
183
+ "success": False,
184
+ "error": str(e),
185
+ "message": "AI assistant query failed"
186
+ }
187
+
188
+ def verify_proof(self, proof_id: str, content: bytes) -> Dict[str, Any]:
189
+ """
190
+ Verify an existing proof.
191
+
192
+ Args:
193
+ proof_id: Unique proof identifier
194
+ content: Original content to verify
195
+
196
+ Returns:
197
+ {
198
+ "success": bool,
199
+ "verification_result": VerificationResult,
200
+ "message": str
201
+ }
202
+ """
203
+ try:
204
+ logger.info(f"Starting proof verification: {proof_id}")
205
+
206
+ result = self.verification_agent.execute({
207
+ "proof_id": proof_id,
208
+ "content": content
209
+ })
210
+
211
+ verification_result = result["verification_result"]
212
+
213
+ logger.info(f"Verification completed: {verification_result.message}")
214
+
215
+ result = {
216
+ "success": True,
217
+ "verification_result": verification_result,
218
+ "message": verification_result.message
219
+ }
220
+
221
+ # OPTIONAL: AI Sidecar explains verification (non-blocking)
222
+ if self.ai_sidecar.enabled:
223
+ try:
224
+ # Get original proof for context
225
+ proof = self.storage_agent.get_proof(proof_id)
226
+ assistant_response = self.ai_sidecar.explain_verification(
227
+ verification_result,
228
+ proof
229
+ )
230
+ result["assistant"] = assistant_response.to_dict()
231
+ logger.info("AI verification explanation added")
232
+ except Exception as e:
233
+ logger.warning(f"AI explanation failed (non-critical): {str(e)}")
234
+ # AI failure does not affect core response
235
+
236
+ return result
237
+
238
+ except ProofSystemError as e:
239
+ logger.error(f"Verification failed: {str(e)}")
240
+ return {
241
+ "success": False,
242
+ "error": str(e),
243
+ "error_type": e.__class__.__name__,
244
+ "message": "Verification failed"
245
+ }
246
+ except Exception as e:
247
+ logger.error(f"Unexpected error: {str(e)}")
248
+ return {
249
+ "success": False,
250
+ "error": str(e),
251
+ "error_type": "UnexpectedError",
252
+ "message": "An unexpected error occurred"
253
+ }
254
+
255
+ def get_proof(self, proof_id: str) -> Dict[str, Any]:
256
+ """
257
+ Retrieve a proof from storage.
258
+
259
+ Args:
260
+ proof_id: Unique proof identifier
261
+
262
+ Returns:
263
+ {
264
+ "success": bool,
265
+ "proof": Proof | None,
266
+ "message": str
267
+ }
268
+ """
269
+ try:
270
+ logger.info(f"Retrieving proof: {proof_id}")
271
+
272
+ proof = self.storage_agent.get_proof(proof_id)
273
+
274
+ if not proof:
275
+ return {
276
+ "success": False,
277
+ "proof": None,
278
+ "message": f"Proof not found: {proof_id}"
279
+ }
280
+
281
+ return {
282
+ "success": True,
283
+ "proof": proof,
284
+ "message": "Proof retrieved successfully"
285
+ }
286
+
287
+ except ProofSystemError as e:
288
+ logger.error(f"Proof retrieval failed: {str(e)}")
289
+ return {
290
+ "success": False,
291
+ "error": str(e),
292
+ "error_type": e.__class__.__name__,
293
+ "message": "Proof retrieval failed"
294
+ }
295
+ except Exception as e:
296
+ logger.error(f"Unexpected error: {str(e)}")
297
+ return {
298
+ "success": False,
299
+ "error": str(e),
300
+ "error_type": "UnexpectedError",
301
+ "message": "An unexpected error occurred"
302
+ }
deo.py ADDED
@@ -0,0 +1 @@
 
 
1
+ print("run")
docker-compose.yml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ api:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ ports:
9
+ - "8000:8000"
10
+ environment:
11
+ # Core settings
12
+ - MAX_FILE_SIZE_MB=10
13
+ - HASH_ALGORITHM=sha256
14
+
15
+ # OCR settings (optional)
16
+ - OCR_ENABLED=true
17
+ - OCR_LANGUAGE=eng
18
+
19
+ # AI settings (optional - set your key)
20
+ - AI_ENABLED=${AI_ENABLED:-false}
21
+ - GEMINI_API_KEY=${GEMINI_API_KEY:-}
22
+ - GEMINI_MODEL=gemini-1.5-flash
23
+
24
+ # Storage settings (configure when ready)
25
+ - SUPABASE_URL=${SUPABASE_URL:-}
26
+ - SUPABASE_KEY=${SUPABASE_KEY:-}
27
+ - SUPABASE_TABLE=proofs
28
+
29
+ volumes:
30
+ # Mount .env file for local development
31
+ - ./.env:/app/.env:ro
32
+
33
+ restart: unless-stopped
34
+
35
+ healthcheck:
36
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
37
+ interval: 30s
38
+ timeout: 10s
39
+ retries: 3
40
+ start_period: 5s
image_base64.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ��
main.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main Entry Point
3
+ Demonstrates orchestrator usage with example workflows.
4
+ No business logic - just a thin interface layer.
5
+ """
6
+ from dotenv import load_dotenv
7
+ load_dotenv()
8
+
9
+ from core.orchestrator import Orchestrator
10
+
11
+
12
+ print(">>> MAIN FILE LOADED <<<")
13
+
14
+
15
+ def example_text_proof():
16
+ """Example: Create proof from text content."""
17
+ print("\n=== Example 1: Text Proof Creation ===")
18
+
19
+ orchestrator = Orchestrator()
20
+
21
+ # Create proof from text
22
+ result = orchestrator.create_proof({
23
+ "type": "text",
24
+ "content": "This is a confidential document that needs timestamping."
25
+ })
26
+
27
+ if result["success"]:
28
+ print(f"✓ Proof created: {result['proof_id']}")
29
+ print(f" Hash: {result['proof'].content_hash}")
30
+ print(f" Timestamp: {result['proof'].timestamp}")
31
+
32
+ # AI explanation (if available)
33
+ if "assistant" in result:
34
+ print(f"\n 🤖 AI Explanation:")
35
+ print(f" {result['assistant']['response']}")
36
+ else:
37
+ print(f"✗ Failed: {result['message']}")
38
+
39
+ return result
40
+
41
+
42
+ def example_file_proof():
43
+ """Example: Create proof from file content."""
44
+ print("\n=== Example 2: File Proof Creation ===")
45
+
46
+ orchestrator = Orchestrator()
47
+
48
+ # Simulate file content
49
+ file_content = b"Binary file content here"
50
+
51
+ result = orchestrator.create_proof({
52
+ "type": "file",
53
+ "content": file_content,
54
+ "filename": "document.pdf"
55
+ })
56
+
57
+ if result["success"]:
58
+ print(f"✓ Proof created: {result['proof_id']}")
59
+ print(f" Hash: {result['proof'].content_hash}")
60
+ print(f" File: {result['proof'].metadata['filename']}")
61
+ print(f" OCR Status: {result['proof'].ocr_status}")
62
+ if result['proof'].extracted_text:
63
+ print(f" Extracted Text: {result['proof'].extracted_text[:100]}...")
64
+ else:
65
+ print(f"✗ Failed: {result['message']}")
66
+
67
+ return result
68
+
69
+
70
+ def example_image_ocr():
71
+ """Example: Create proof from image with OCR."""
72
+ print("\n=== Example 5: Image Proof with OCR ===")
73
+
74
+ orchestrator = Orchestrator()
75
+
76
+ # Note: In real usage, this would be actual image bytes
77
+ # For demo, we'll just show the structure
78
+ print("Note: This example requires actual image bytes with text.")
79
+ print("Skipping OCR demo - install Tesseract and provide real image to test.")
80
+
81
+ # Example structure:
82
+ # with open("invoice.png", "rb") as f:
83
+ # image_bytes = f.read()
84
+ #
85
+ # result = orchestrator.create_proof({
86
+ # "type": "file",
87
+ # "content": image_bytes,
88
+ # "filename": "invoice.png"
89
+ # })
90
+ #
91
+ # if result["success"]:
92
+ # print(f"✓ Proof created with OCR")
93
+ # print(f" Extracted: {result['proof'].extracted_text}")
94
+
95
+ print("✓ OCR integration ready for image inputs")
96
+
97
+
98
+ def example_ai_assistant():
99
+ """Example: Ask AI assistant about proofs."""
100
+ print("\n=== Example 6: AI Assistant Q&A ===")
101
+
102
+ orchestrator = Orchestrator()
103
+
104
+ if not orchestrator.ai_sidecar.enabled:
105
+ print("⚠️ AI assistant is disabled")
106
+ print(" Enable with: AI_ENABLED=true GEMINI_API_KEY=your-key")
107
+ return
108
+
109
+ # Create a proof first
110
+ create_result = orchestrator.create_proof({
111
+ "type": "text",
112
+ "content": "Important contract signed on December 24, 2024"
113
+ })
114
+
115
+ if not create_result["success"]:
116
+ print("Failed to create proof for demo")
117
+ return
118
+
119
+ proof_id = create_result["proof_id"]
120
+
121
+ # Ask AI about the proof
122
+ questions = [
123
+ "What does this proof guarantee?",
124
+ "How can I verify this proof later?",
125
+ "What should I do with this proof ID?"
126
+ ]
127
+
128
+ for question in questions:
129
+ print(f"\n Q: {question}")
130
+ result = orchestrator.ask_assistant(question, proof_id)
131
+
132
+ if result["success"]:
133
+ print(f" 🤖 A: {result['assistant']['response']}")
134
+ else:
135
+ print(f" ✗ {result['message']}")
136
+
137
+
138
+ def example_verification():
139
+ """Example: Verify an existing proof."""
140
+ print("\n=== Example 3: Proof Verification ===")
141
+
142
+ orchestrator = Orchestrator()
143
+
144
+ # First create a proof
145
+ original_content = "Verify this content"
146
+ create_result = orchestrator.create_proof({
147
+ "type": "text",
148
+ "content": original_content
149
+ })
150
+
151
+ if not create_result["success"]:
152
+ print("Failed to create proof for verification")
153
+ return
154
+
155
+ proof_id = create_result["proof_id"]
156
+ print(f"Created proof: {proof_id}")
157
+
158
+ # Verify with correct content
159
+ verify_result = orchestrator.verify_proof(
160
+ proof_id,
161
+ original_content.encode('utf-8')
162
+ )
163
+
164
+ if verify_result["success"]:
165
+ vr = verify_result["verification_result"]
166
+ status = "✓ VALID" if vr.is_valid else "✗ INVALID"
167
+ print(f"{status}: {vr.message}")
168
+
169
+ # AI explanation (if available)
170
+ if "assistant" in verify_result:
171
+ print(f"\n 🤖 AI Explanation:")
172
+ print(f" {verify_result['assistant']['response']}")
173
+ else:
174
+ print(f"✗ Verification failed: {verify_result['message']}")
175
+
176
+ # Verify with tampered content
177
+ print("\nAttempting verification with tampered content:")
178
+ tampered_result = orchestrator.verify_proof(
179
+ proof_id,
180
+ b"Tampered content"
181
+ )
182
+
183
+ if tampered_result["success"]:
184
+ vr = tampered_result["verification_result"]
185
+ status = "✓ VALID" if vr.is_valid else "✗ INVALID"
186
+ print(f"{status}: {vr.message}")
187
+
188
+
189
+ def example_retrieval():
190
+ """Example: Retrieve a stored proof."""
191
+ print("\n=== Example 4: Proof Retrieval ===")
192
+
193
+ orchestrator = Orchestrator()
194
+
195
+ # Create a proof first
196
+ create_result = orchestrator.create_proof({
197
+ "type": "text",
198
+ "content": "Retrieve this later"
199
+ })
200
+
201
+ if not create_result["success"]:
202
+ print("Failed to create proof")
203
+ return
204
+
205
+ proof_id = create_result["proof_id"]
206
+
207
+ # Retrieve it
208
+ get_result = orchestrator.get_proof(proof_id)
209
+
210
+ if get_result["success"]:
211
+ proof = get_result["proof"]
212
+ print(f"✓ Retrieved proof: {proof.proof_id}")
213
+ print(f" Hash: {proof.content_hash}")
214
+ print(f" Size: {proof.content_size} bytes")
215
+ print(f" Created: {proof.timestamp}")
216
+ else:
217
+ print(f"✗ Failed: {get_result['message']}")
218
+
219
+
220
+ def main():
221
+ """Run all examples."""
222
+ print("=" * 60)
223
+ print("PROOF-OF-EXISTENCE SYSTEM - Priority-3 MVP")
224
+ print("Deterministic Core + OCR + AI Sidecar")
225
+ print("=" * 60)
226
+
227
+ try:
228
+ example_text_proof()
229
+ example_file_proof()
230
+ example_verification()
231
+ example_retrieval()
232
+ example_image_ocr()
233
+ example_ai_assistant()
234
+
235
+ print("\n" + "=" * 60)
236
+ print("All examples completed successfully!")
237
+ print("=" * 60)
238
+
239
+ except Exception as e:
240
+ print(f"\n✗ Error running examples: {str(e)}")
241
+
242
+
243
+ if __name__ == "__main__":
244
+ main()
models/proof.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Proof data model representing a cryptographic proof of existence.
3
+ """
4
+
5
+ from dataclasses import dataclass, asdict
6
+ from typing import Optional
7
+ from datetime import datetime
8
+ import json
9
+
10
+
11
+ @dataclass
12
+ class Proof:
13
+ """
14
+ Immutable proof object containing hash, metadata, and validation info.
15
+ """
16
+ proof_id: str
17
+ content_hash: str
18
+ hash_algorithm: str
19
+ content_type: str
20
+ content_size: int
21
+ timestamp: str
22
+ validation_status: str
23
+ metadata: dict
24
+ extracted_text: Optional[str] = None # OCR output (if applicable)
25
+ ocr_engine: Optional[str] = None # Fixed: "tesseract"
26
+ ocr_status: Optional[str] = None # "success" | "skipped" | "failed"
27
+
28
+ def to_dict(self) -> dict:
29
+ """Convert proof to dictionary."""
30
+ return asdict(self)
31
+
32
+ def to_json(self) -> str:
33
+ """Serialize proof to JSON string."""
34
+ return json.dumps(self.to_dict(), indent=2)
35
+
36
+ @classmethod
37
+ def from_dict(cls, data: dict) -> 'Proof':
38
+ """Create proof from dictionary."""
39
+ return cls(**data)
40
+
41
+ @classmethod
42
+ def from_json(cls, json_str: str) -> 'Proof':
43
+ """Deserialize proof from JSON string."""
44
+ return cls.from_dict(json.loads(json_str))
45
+
46
+
47
+ @dataclass
48
+ class VerificationResult:
49
+ """
50
+ Result of proof verification operation.
51
+ """
52
+ proof_id: str
53
+ is_valid: bool
54
+ original_hash: str
55
+ computed_hash: str
56
+ timestamp: str
57
+ message: str
58
+
59
+ def to_dict(self) -> dict:
60
+ """Convert result to dictionary."""
61
+ return asdict(self)
requirements.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ python-multipart
3
+
4
+ fastapi==0.109.0
5
+ uvicorn[standard]==0.27.0
6
+ python-dotenv==1.0.0
7
+ pydantic==2.5.3
8
+
9
+ # OCR dependencies (optional but recommended)
10
+ pytesseract==0.3.10
11
+ Pillow==10.2.0
12
+
13
+ # AI dependencies (optional)
14
+ google-generativeai==0.3.2
15
+
16
+ # Storage dependencies (when Supabase is connected)
17
+ # supabase==2.3.0
18
+
19
+ # Development dependencies (uncomment for local dev)
20
+ # pytest==7.4.3
21
+ # pytest-asyncio==0.23.3
22
+ # httpx==0.26.0
sidecar/gemini_sidecar.py ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gemini AI Sidecar
3
+ Non-authoritative assistant layer for proof explanation and guidance.
4
+
5
+ CRITICAL: This module is an OBSERVER, not a decision-maker.
6
+ - It explains proofs, never validates them
7
+ - It guides users, never creates proofs
8
+ - It fails gracefully, never blocks core operations
9
+ """
10
+
11
+ from typing import Dict, Any, Optional
12
+ import logging
13
+ from dataclasses import dataclass
14
+
15
+ from models.proof import Proof, VerificationResult
16
+ from config.settings import settings
17
+
18
+ # Lazy import - only load if AI is enabled
19
+ try:
20
+ import google.generativeai as genai
21
+ GEMINI_AVAILABLE = True
22
+ except ImportError:
23
+ GEMINI_AVAILABLE = False
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ @dataclass
29
+ class AssistantResponse:
30
+ """
31
+ Non-authoritative AI response wrapper.
32
+ Makes it explicit that this is guidance, not fact.
33
+ """
34
+ provider: str = "gemini"
35
+ role: str = "explanatory"
36
+ response: str = ""
37
+ confidence: str = "non-authoritative"
38
+ error: Optional[str] = None
39
+
40
+ def to_dict(self) -> Dict[str, Any]:
41
+ """Convert to dictionary for JSON serialization."""
42
+ result = {
43
+ "provider": self.provider,
44
+ "role": self.role,
45
+ "response": self.response,
46
+ "confidence": self.confidence
47
+ }
48
+ if self.error:
49
+ result["error"] = self.error
50
+ return result
51
+
52
+
53
+ class GeminiSidecar:
54
+ """
55
+ AI Sidecar for providing explanations and guidance.
56
+
57
+ This is NOT part of the deterministic core.
58
+ All methods return graceful fallbacks on failure.
59
+ """
60
+
61
+ def __init__(self):
62
+ """Initialize Gemini client if available and configured."""
63
+ self.enabled = False
64
+ self.client = None
65
+ self._initialize()
66
+
67
+ def _initialize(self):
68
+ """
69
+ Initialize Gemini client.
70
+ Fails gracefully if dependencies missing or API key invalid.
71
+ """
72
+ # Check if AI is enabled in config
73
+ if not settings.AI_ENABLED:
74
+ logger.info("AI Sidecar is disabled in configuration")
75
+ return
76
+
77
+ # Check if dependencies available
78
+ if not GEMINI_AVAILABLE:
79
+ logger.warning(
80
+ "Gemini dependencies not available. "
81
+ "Install with: pip install google-generativeai"
82
+ )
83
+ return
84
+
85
+ # Check if API key is configured
86
+ if not settings.validate_ai():
87
+ logger.warning(
88
+ "Gemini API key not configured. "
89
+ "Set GEMINI_API_KEY environment variable to enable AI features."
90
+ )
91
+ return
92
+
93
+ try:
94
+ # Configure Gemini
95
+ genai.configure(api_key=settings.GEMINI_API_KEY)
96
+ self.client = genai.GenerativeModel(settings.GEMINI_MODEL)
97
+ self.enabled = True
98
+ logger.info(f"AI Sidecar initialized with model: {settings.GEMINI_MODEL}")
99
+ except Exception as e:
100
+ logger.error(f"Failed to initialize Gemini client: {str(e)}")
101
+ self.enabled = False
102
+
103
+ def explain_proof(self, proof: Proof) -> AssistantResponse:
104
+ """
105
+ Explain what a proof means in plain language.
106
+
107
+ Args:
108
+ proof: Proof object to explain
109
+
110
+ Returns:
111
+ AssistantResponse with explanation or fallback
112
+ """
113
+ if not self.enabled:
114
+ return self._fallback_explain_proof(proof)
115
+
116
+ try:
117
+ prompt = self._build_proof_explanation_prompt(proof)
118
+ response = self._call_gemini(prompt)
119
+
120
+ return AssistantResponse(
121
+ response=response
122
+ )
123
+
124
+ except Exception as e:
125
+ logger.error(f"AI explanation failed: {str(e)}")
126
+ return self._fallback_explain_proof(proof)
127
+
128
+ def explain_verification(
129
+ self,
130
+ verification_result: VerificationResult,
131
+ proof: Optional[Proof] = None
132
+ ) -> AssistantResponse:
133
+ """
134
+ Explain what a verification result means.
135
+
136
+ Args:
137
+ verification_result: Verification result to explain
138
+ proof: Optional original proof for context
139
+
140
+ Returns:
141
+ AssistantResponse with explanation or fallback
142
+ """
143
+ if not self.enabled:
144
+ return self._fallback_explain_verification(verification_result)
145
+
146
+ try:
147
+ prompt = self._build_verification_explanation_prompt(
148
+ verification_result,
149
+ proof
150
+ )
151
+ response = self._call_gemini(prompt)
152
+
153
+ return AssistantResponse(
154
+ response=response
155
+ )
156
+
157
+ except Exception as e:
158
+ logger.error(f"AI verification explanation failed: {str(e)}")
159
+ return self._fallback_explain_verification(verification_result)
160
+
161
+ def summarize_content(self, extracted_text: str) -> AssistantResponse:
162
+ """
163
+ Summarize extracted text content.
164
+
165
+ Args:
166
+ extracted_text: Text extracted from OCR or document
167
+
168
+ Returns:
169
+ AssistantResponse with summary or fallback
170
+ """
171
+ if not self.enabled:
172
+ return self._fallback_summarize(extracted_text)
173
+
174
+ if not extracted_text or not extracted_text.strip():
175
+ return AssistantResponse(
176
+ response="No text content available to summarize."
177
+ )
178
+
179
+ try:
180
+ prompt = self._build_summarization_prompt(extracted_text)
181
+ response = self._call_gemini(prompt)
182
+
183
+ return AssistantResponse(
184
+ response=response
185
+ )
186
+
187
+ except Exception as e:
188
+ logger.error(f"AI summarization failed: {str(e)}")
189
+ return self._fallback_summarize(extracted_text)
190
+
191
+ def answer_question(
192
+ self,
193
+ question: str,
194
+ proof: Optional[Proof] = None,
195
+ context: Optional[str] = None
196
+ ) -> AssistantResponse:
197
+ """
198
+ Answer user questions about proofs or content.
199
+
200
+ Args:
201
+ question: User's question
202
+ proof: Optional proof for context
203
+ context: Optional additional context
204
+
205
+ Returns:
206
+ AssistantResponse with answer or fallback
207
+ """
208
+ if not self.enabled:
209
+ return self._fallback_answer()
210
+
211
+ try:
212
+ prompt = self._build_question_prompt(question, proof, context)
213
+ response = self._call_gemini(prompt)
214
+
215
+ return AssistantResponse(
216
+ response=response
217
+ )
218
+
219
+ except Exception as e:
220
+ logger.error(f"AI question answering failed: {str(e)}")
221
+ return self._fallback_answer()
222
+
223
+ def _call_gemini(self, prompt: str) -> str:
224
+ """
225
+ Make API call to Gemini with timeout and error handling.
226
+
227
+ Args:
228
+ prompt: Prompt to send to Gemini
229
+
230
+ Returns:
231
+ Generated response text
232
+
233
+ Raises:
234
+ Exception: If API call fails
235
+ """
236
+ if not self.client:
237
+ raise Exception("Gemini client not initialized")
238
+
239
+ # Generate response with timeout
240
+ response = self.client.generate_content(
241
+ prompt,
242
+ generation_config={
243
+ "temperature": 0.7,
244
+ "max_output_tokens": 500,
245
+ }
246
+ )
247
+
248
+ return response.text.strip()
249
+
250
+ def _build_proof_explanation_prompt(self, proof: Proof) -> str:
251
+ """Build prompt for proof explanation."""
252
+ return f"""You are an assistant explaining a cryptographic proof of existence.
253
+
254
+ Proof Details:
255
+ - Proof ID: {proof.proof_id}
256
+ - Content Type: {proof.content_type}
257
+ - Content Size: {proof.content_size} bytes
258
+ - Hash: {proof.content_hash[:16]}...
259
+ - Timestamp: {proof.timestamp}
260
+ - OCR Status: {proof.ocr_status or 'not applicable'}
261
+
262
+ Explain in 2-3 sentences what this proof means and why it's useful.
263
+ Focus on:
264
+ 1. What was proven (that content existed at a point in time)
265
+ 2. How it works (cryptographic hash)
266
+ 3. Why it's trustworthy (immutable)
267
+
268
+ Keep it simple and non-technical."""
269
+
270
+ def _build_verification_explanation_prompt(
271
+ self,
272
+ result: VerificationResult,
273
+ proof: Optional[Proof]
274
+ ) -> str:
275
+ """Build prompt for verification explanation."""
276
+ status = "VALID" if result.is_valid else "INVALID"
277
+
278
+ prompt = f"""You are an assistant explaining a proof verification result.
279
+
280
+ Verification Result:
281
+ - Status: {status}
282
+ - Original Hash: {result.original_hash[:16]}...
283
+ - Computed Hash: {result.computed_hash[:16]}...
284
+ - Match: {result.is_valid}
285
+
286
+ Explain in 2-3 sentences what this verification result means.
287
+ If valid: explain that the content hasn't changed.
288
+ If invalid: explain that the content has been modified."""
289
+
290
+ if proof and proof.extracted_text:
291
+ prompt += f"\n\nOriginal content was: {proof.content_type}"
292
+
293
+ return prompt + "\n\nKeep it simple and actionable."
294
+
295
+ def _build_summarization_prompt(self, text: str) -> str:
296
+ """Build prompt for content summarization."""
297
+ # Truncate if very long
298
+ max_chars = 2000
299
+ truncated = text[:max_chars]
300
+ if len(text) > max_chars:
301
+ truncated += "... (truncated)"
302
+
303
+ return f"""Summarize the following text in 2-3 sentences:
304
+
305
+ {truncated}
306
+
307
+ Focus on:
308
+ - Main topic or purpose
309
+ - Key information
310
+ - Document type (if identifiable)
311
+
312
+ Keep it concise and factual."""
313
+
314
+ def _build_question_prompt(
315
+ self,
316
+ question: str,
317
+ proof: Optional[Proof],
318
+ context: Optional[str]
319
+ ) -> str:
320
+ """Build prompt for question answering."""
321
+ prompt = f"""You are an assistant helping users understand cryptographic proofs.
322
+
323
+ User Question: {question}
324
+ """
325
+
326
+ if proof:
327
+ prompt += f"""
328
+ Proof Context:
329
+ - Type: {proof.content_type}
330
+ - Size: {proof.content_size} bytes
331
+ - Created: {proof.timestamp}
332
+ - Hash: {proof.content_hash[:16]}...
333
+ """
334
+ if proof.extracted_text:
335
+ prompt += f"- Extracted Text: {proof.extracted_text[:200]}...\n"
336
+
337
+ if context:
338
+ prompt += f"\nAdditional Context: {context}\n"
339
+
340
+ prompt += """
341
+ Provide a helpful answer in 2-3 sentences.
342
+ Remember: You are explaining, not validating.
343
+ Focus on helping the user understand proofs and their uses."""
344
+
345
+ return prompt
346
+
347
+ # Fallback methods - used when AI is unavailable
348
+
349
+ def _fallback_explain_proof(self, proof: Proof) -> AssistantResponse:
350
+ """Fallback explanation when AI is unavailable."""
351
+ response = (
352
+ f"This is a cryptographic proof that {proof.content_type} content "
353
+ f"({proof.content_size} bytes) existed at {proof.timestamp}. "
354
+ f"The proof uses a SHA-256 hash to ensure the content cannot be altered "
355
+ f"without detection."
356
+ )
357
+
358
+ return AssistantResponse(
359
+ response=response,
360
+ error="AI assistant unavailable, using fallback explanation"
361
+ )
362
+
363
+ def _fallback_explain_verification(
364
+ self,
365
+ result: VerificationResult
366
+ ) -> AssistantResponse:
367
+ """Fallback verification explanation when AI is unavailable."""
368
+ if result.is_valid:
369
+ response = (
370
+ "Verification successful: The content matches the original proof. "
371
+ "The cryptographic hash is identical, confirming no modifications "
372
+ "have been made."
373
+ )
374
+ else:
375
+ response = (
376
+ "Verification failed: The content does not match the original proof. "
377
+ "The cryptographic hashes differ, indicating the content has been "
378
+ "modified since the proof was created."
379
+ )
380
+
381
+ return AssistantResponse(
382
+ response=response,
383
+ error="AI assistant unavailable, using fallback explanation"
384
+ )
385
+
386
+ def _fallback_summarize(self, text: str) -> AssistantResponse:
387
+ """Fallback summarization when AI is unavailable."""
388
+ word_count = len(text.split())
389
+ char_count = len(text)
390
+
391
+ preview = text[:150] + "..." if len(text) > 150 else text
392
+
393
+ response = (
394
+ f"Content preview ({word_count} words, {char_count} characters): "
395
+ f"{preview}"
396
+ )
397
+
398
+ return AssistantResponse(
399
+ response=response,
400
+ error="AI assistant unavailable, showing content preview"
401
+ )
402
+
403
+ def _fallback_answer(self) -> AssistantResponse:
404
+ """Fallback answer when AI is unavailable."""
405
+ return AssistantResponse(
406
+ response=(
407
+ "AI assistant is currently unavailable. "
408
+ "For questions about proofs, refer to the documentation or "
409
+ "contact support."
410
+ ),
411
+ error="AI assistant unavailable"
412
+ )
test.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simple test script for uploading images to the proof system.
3
+ """
4
+ import requests
5
+ import base64
6
+ import json
7
+ from pathlib import Path
8
+
9
+ # API base URL
10
+ BASE_URL = "http://127.0.0.1:8000"
11
+
12
+ def test_text_proof():
13
+ """Test creating a proof from text."""
14
+ print("\n=== Testing Text Proof ===")
15
+
16
+ response = requests.post(
17
+ f"{BASE_URL}/proof/create/text",
18
+ json={"content": "Hello, this is a test document!"}
19
+ )
20
+
21
+ result = response.json()
22
+ print(f"Success: {result.get('success')}")
23
+ print(f"Proof ID: {result.get('proof_id')}")
24
+ print(f"Hash: {result.get('hash')}")
25
+
26
+ if 'assistant' in result and result['assistant']:
27
+ print(f"\n🤖 AI: {result['assistant']['response']}")
28
+
29
+ return result.get('proof_id')
30
+
31
+
32
+ def test_image_proof(image_path):
33
+ """Test creating a proof from an image."""
34
+ print(f"\n=== Testing Image Proof: {image_path} ===")
35
+
36
+ # Check if file exists
37
+ if not Path(image_path).exists():
38
+ print(f"❌ Error: File not found: {image_path}")
39
+ print(f"Current directory: {Path.cwd()}")
40
+ print(f"Available images: {list(Path('.').glob('*.png')) + list(Path('.').glob('*.jpg'))}")
41
+ return None
42
+
43
+ # Use FastAPI's multipart/form-data upload (not base64)
44
+ with open(image_path, "rb") as f:
45
+ files = {"file": (Path(image_path).name, f, "image/png")}
46
+
47
+ print(f"Uploading {Path(image_path).name}...")
48
+
49
+ response = requests.post(
50
+ f"{BASE_URL}/proof/create/file",
51
+ files=files
52
+ )
53
+
54
+ if response.status_code != 200:
55
+ print(f"❌ API Error {response.status_code}: {response.text}")
56
+ return None
57
+
58
+ result = response.json()
59
+
60
+ if not result.get('success'):
61
+ print(f"❌ Failed: {result.get('message')}")
62
+ if 'error' in result:
63
+ print(f"Error: {result['error']}")
64
+ return None
65
+
66
+ print(f"✅ Success: {result.get('success')}")
67
+ print(f"Proof ID: {result.get('proof_id')}")
68
+ print(f"Hash: {result.get('hash')}")
69
+ print(f"OCR Status: {result.get('ocr_status')}")
70
+
71
+ if result.get('extracted_text'):
72
+ text = result['extracted_text']
73
+ preview = text[:100] + "..." if len(text) > 100 else text
74
+ print(f"Extracted Text: {preview}")
75
+ else:
76
+ print("Extracted Text: (none - this is normal for photos)")
77
+
78
+ if 'assistant' in result and result['assistant']:
79
+ print(f"\n🤖 AI: {result['assistant']['response']}")
80
+
81
+ return result.get('proof_id'), Path(image_path).name
82
+
83
+
84
+ def test_verify_proof(proof_id, original_content, filename=None):
85
+ """Test verifying a proof."""
86
+ print(f"\n=== Testing Verification: {proof_id} ===")
87
+
88
+ # For file uploads, we need to re-upload the file
89
+ if filename:
90
+ print("⚠️ File verification requires the original file")
91
+ print(f" API endpoint: POST /proof/verify")
92
+ print(f" Body: {{'proof_id': '{proof_id}', 'content': 'original_content_as_string'}}")
93
+ return
94
+
95
+ # For text content
96
+ if isinstance(original_content, bytes):
97
+ content_str = original_content.decode('utf-8')
98
+ else:
99
+ content_str = original_content
100
+
101
+ response = requests.post(
102
+ f"{BASE_URL}/proof/verify",
103
+ json={
104
+ "proof_id": proof_id,
105
+ "content": content_str
106
+ }
107
+ )
108
+
109
+ if response.status_code != 200:
110
+ print(f"❌ API Error {response.status_code}: {response.text}")
111
+ return
112
+
113
+ result = response.json()
114
+ print(f"Valid: {result.get('is_valid')}")
115
+ print(f"Message: {result.get('message')}")
116
+
117
+ if 'assistant' in result and result['assistant']:
118
+ print(f"\n🤖 AI: {result['assistant']['response']}")
119
+
120
+
121
+ def main():
122
+ """Run all tests."""
123
+ print("=" * 60)
124
+ print("PROOF SYSTEM - API TESTS")
125
+ print("=" * 60)
126
+
127
+ # Test 1: Text proof
128
+ text_proof_id = test_text_proof()
129
+
130
+ # Test 2: Verify text proof
131
+ if text_proof_id:
132
+ test_verify_proof(text_proof_id, "Hello, this is a test document!")
133
+
134
+ # Test 3: Image proof
135
+ image_files = ["test1.png", "test1.jpg", "test1.jpeg"]
136
+ image_path = None
137
+
138
+ for img in image_files:
139
+ if Path(img).exists():
140
+ image_path = img
141
+ break
142
+
143
+ if image_path:
144
+ result = test_image_proof(image_path)
145
+
146
+ if result:
147
+ image_proof_id, filename = result
148
+ # Note: File verification needs special handling
149
+ print(f"\n💡 To verify the image proof, use proof ID: {image_proof_id}")
150
+ else:
151
+ print("\n⚠️ No test1 image found. Place test1.png in the backend folder.")
152
+
153
+ print("\n" + "=" * 60)
154
+ print("Tests completed!")
155
+ print("=" * 60)
156
+
157
+
158
+ if __name__ == "__main__":
159
+ main()