field_semantic_mapping / schema_definitions.py
Tanishq Salkar
initial visual mapping code added to hf
db81e28
"""
Schema definitions for PDF field extraction.
This module loads the real estate alias IDs from a JSON file that is exported
from the TypeScript source (real-estate-property-alias-ids.ts) using the
export-alias-ids.js script.
To update the schema:
1. Modify the TypeScript source file
2. Run: node scripts/export-alias-ids.js
3. The JSON file will be updated automatically
"""
import json
import os
# Valid field types that the Vision model can detect
VALID_FIELD_TYPES = [
"checkbox",
"date",
"dollar",
"email",
"fullName",
"initial",
"longText",
"number",
"phone",
"shortText",
"signature",
"usAddress",
"ssn",
"iban"
]
def load_real_estate_schema():
"""
Load the real estate schema from the exported JSON file.
Falls back to an empty dict if the file doesn't exist.
"""
json_path = os.path.join(os.path.dirname(__file__), "real_estate_alias_ids.json")
if not os.path.exists(json_path):
print(f"Warning: {json_path} not found. Run 'node scripts/export-alias-ids.js' to generate it.")
return {}
try:
with open(json_path, "r") as f:
data = json.load(f)
# Transform the JSON format to match the expected format
# JSON has: { key: { description, type, name } }
# We need: { key: { desc, type } }
schema = {}
for key, value in data.items():
schema[key] = {
"desc": value.get("description", ""),
"type": value.get("type", "shortText"),
"name": value.get("name", key)
}
return schema
except Exception as e:
print(f"Error loading real estate schema: {e}")
return {}
# Load the schema on module import
REAL_ESTATE_SCHEMA_MAP = load_real_estate_schema()
# Print info about loaded schema
if REAL_ESTATE_SCHEMA_MAP:
print(f"Loaded {len(REAL_ESTATE_SCHEMA_MAP)} alias IDs from real_estate_alias_ids.json")
else:
print("Warning: No alias IDs loaded. Schema mapping will create dynamic fields for all detections.")