alda / models.py
jameszokah's picture
feat: Implement core gazette parsing, data extraction, and foundational AI auditor system components.
f0c339c
# Database Models for Kuwait Gazette Data
from sqlalchemy import (
create_engine, Column, Integer, String, Text, Date, DateTime,
ForeignKey, Float, Boolean, JSON, Table
)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, sessionmaker
from datetime import datetime, date
from typing import Optional
Base = declarative_base()
class GazetteIssue(Base):
"""Represents a single gazette issue."""
__tablename__ = "gazette_issues"
id = Column(Integer, primary_key=True)
issue_number = Column(String(50), unique=True, index=True) # e.g., "1674"
issue_date = Column(Date)
hijri_date = Column(String(100)) # e.g., "1 Sha'ban 1445 AH"
year = Column(Integer) # e.g., 70 (Seventieth Year)
total_pages = Column(Integer)
file_path = Column(String(500))
processed_at = Column(DateTime, default=datetime.utcnow)
raw_content = Column(Text) # Store original markdown
# Relationships
# Relationships
decrees = relationship("Decree", back_populates="issue", cascade="all, delete-orphan")
bankruptcy_cases = relationship("BankruptcyCase", back_populates="issue", cascade="all, delete-orphan")
tenders = relationship("Tender", back_populates="issue", cascade="all, delete-orphan")
assemblies = relationship("GeneralAssembly", back_populates="issue", cascade="all, delete-orphan")
agencies = relationship("CommercialAgency", back_populates="issue", cascade="all, delete-orphan")
announcements = relationship("CompanyAnnouncement", back_populates="issue", cascade="all, delete-orphan")
class Decree(Base):
"""Ministerial decrees and decisions."""
__tablename__ = "decrees"
id = Column(Integer, primary_key=True)
issue_id = Column(Integer, ForeignKey("gazette_issues.id"))
decree_number = Column(String(100)) # e.g., "11 of 2024"
decree_type = Column(String(100)) # e.g., "Ministerial", "Amiri"
title = Column(Text)
ministry = Column(String(200))
issue_date = Column(Date)
summary = Column(Text)
full_text = Column(Text)
page_number = Column(Integer)
# Extraction metadata
char_start = Column(Integer)
char_end = Column(Integer)
extraction_confidence = Column(Float)
issue = relationship("GazetteIssue", back_populates="decrees")
class BankruptcyCase(Base):
"""Bankruptcy proceedings and decisions."""
__tablename__ = "bankruptcy_cases"
id = Column(Integer, primary_key=True)
issue_id = Column(Integer, ForeignKey("gazette_issues.id"))
case_number = Column(String(100)) # e.g., "2023/62"
automated_number = Column(String(100)) # e.g., "230739560"
# Debtor information
debtor_name = Column(String(300))
debtor_type = Column(String(50)) # "individual" or "company"
# Creditor information (JSON array of creditors)
creditors = Column(JSON) # [{name, type, amount}]
# Case details
judge_name = Column(String(200))
trustee_name = Column(String(200))
trustee_fees = Column(Float)
cessation_date = Column(Date)
decision_date = Column(Date)
decision_type = Column(String(100)) # "opened", "rejected", etc.
total_debt = Column(Float)
full_text = Column(Text)
page_number = Column(Integer)
issue = relationship("GazetteIssue", back_populates="bankruptcy_cases")
class Tender(Base):
"""Tender and practice announcements."""
__tablename__ = "tenders"
id = Column(Integer, primary_key=True)
issue_id = Column(Integer, ForeignKey("gazette_issues.id"))
tender_number = Column(String(100)) # e.g., "12-2023/2024"
tender_type = Column(String(50)) # "tender", "practice", "auction"
# Details
subject = Column(Text)
ministry = Column(String(300))
requesting_entity = Column(String(300))
# Dates
issuance_date = Column(Date)
closing_date = Column(Date)
postponed_to = Column(Date)
# Financial
document_fee = Column(Float)
initial_guarantee = Column(Float)
currency = Column(String(10), default="KWD")
# Location
location = Column(String(300))
is_postponed = Column(Boolean, default=False)
full_text = Column(Text)
page_number = Column(Integer)
issue = relationship("GazetteIssue", back_populates="tenders")
class GeneralAssembly(Base):
"""Extraordinary and Ordinary General Assembly minutes."""
__tablename__ = "general_assemblies"
id = Column(Integer, primary_key=True)
issue_id = Column(Integer, ForeignKey("gazette_issues.id"))
# Company information
company_name = Column(String(500))
company_name_arabic = Column(String(500))
company_type = Column(String(100)) # "K.S.C.C.", "K.S.C.P.", etc.
# Meeting details
assembly_type = Column(String(50)) # "Extraordinary", "Ordinary"
meeting_date = Column(Date)
meeting_time = Column(String(50))
meeting_location = Column(String(500))
attendance_percentage = Column(Float)
total_shares_present = Column(Integer)
# Attendees (JSON array)
attendees = Column(JSON) # [{name, role, company}]
# Resolutions (JSON array)
resolutions = Column(JSON) # [{type, description, approved}]
chairman = Column(String(200))
full_text = Column(Text)
page_number = Column(Integer)
issue = relationship("GazetteIssue", back_populates="assemblies")
class CommercialAgency(Base):
"""Commercial agency registrations."""
__tablename__ = "commercial_agencies"
id = Column(Integer, primary_key=True)
issue_id = Column(Integer, ForeignKey("gazette_issues.id"))
registration_number = Column(String(100)) # e.g., "00140/2024"
# Agent (Kuwaiti company)
agent_name = Column(String(500))
agent_name_arabic = Column(String(500))
# Principal (Foreign company)
principal_name = Column(String(500))
principal_country = Column(String(100))
# Agency details
activity_description = Column(Text)
start_date = Column(Date)
end_date = Column(Date)
full_text = Column(Text)
page_number = Column(Integer)
issue = relationship("GazetteIssue", back_populates="agencies")
class CompanyAnnouncement(Base):
"""Company announcements (dissolution, amendments, transfers)."""
__tablename__ = "company_announcements"
id = Column(Integer, primary_key=True)
issue_id = Column(Integer, ForeignKey("gazette_issues.id"))
# Company information
company_name = Column(String(500))
company_name_arabic = Column(String(500))
legal_entity = Column(String(100)) # "LLC", "One-Person Company", etc.
# Announcement details
announcement_type = Column(String(100)) # "dissolution", "amendment", "transfer", "conversion"
# For amendments
amendment_details = Column(JSON) # {before, after, article}
# For transfers
license_number = Column(String(100))
assignor = Column(String(300))
assignee = Column(String(300))
trade_name = Column(String(500))
business_type = Column(String(300))
location = Column(String(300))
# For conversions
from_entity_type = Column(String(100))
to_entity_type = Column(String(100))
full_text = Column(Text)
page_number = Column(Integer)
issue = relationship("GazetteIssue", back_populates="announcements")
# Knowledge Graph Tables
class Entity(Base):
"""Generic entity for knowledge graph."""
__tablename__ = "entities"
id = Column(Integer, primary_key=True)
entity_type = Column(String(100)) # "person", "company", "ministry", etc.
name = Column(String(500), index=True)
attributes = Column(JSON)
source_table = Column(String(100)) # Which table this came from
source_id = Column(Integer) # ID in the source table
class Relationship(Base):
"""Relationships between entities."""
__tablename__ = "relationships"
id = Column(Integer, primary_key=True)
source_entity_id = Column(Integer, ForeignKey("entities.id"))
target_entity_id = Column(Integer, ForeignKey("entities.id"))
relationship_type = Column(String(100)) # "represents", "manages", "owns", etc.
attributes = Column(JSON)
source_entity = relationship("Entity", foreign_keys=[source_entity_id])
target_entity = relationship("Entity", foreign_keys=[target_entity_id])