Spaces:
Sleeping
Sleeping
feat: Implement core gazette parsing, data extraction, and foundational AI auditor system components.
f0c339c | # Database Models for Kuwait Gazette Data | |
| from sqlalchemy import ( | |
| create_engine, Column, Integer, String, Text, Date, DateTime, | |
| ForeignKey, Float, Boolean, JSON, Table | |
| ) | |
| from sqlalchemy.ext.declarative import declarative_base | |
| from sqlalchemy.orm import relationship, sessionmaker | |
| from datetime import datetime, date | |
| from typing import Optional | |
| Base = declarative_base() | |
| class GazetteIssue(Base): | |
| """Represents a single gazette issue.""" | |
| __tablename__ = "gazette_issues" | |
| id = Column(Integer, primary_key=True) | |
| issue_number = Column(String(50), unique=True, index=True) # e.g., "1674" | |
| issue_date = Column(Date) | |
| hijri_date = Column(String(100)) # e.g., "1 Sha'ban 1445 AH" | |
| year = Column(Integer) # e.g., 70 (Seventieth Year) | |
| total_pages = Column(Integer) | |
| file_path = Column(String(500)) | |
| processed_at = Column(DateTime, default=datetime.utcnow) | |
| raw_content = Column(Text) # Store original markdown | |
| # Relationships | |
| # Relationships | |
| decrees = relationship("Decree", back_populates="issue", cascade="all, delete-orphan") | |
| bankruptcy_cases = relationship("BankruptcyCase", back_populates="issue", cascade="all, delete-orphan") | |
| tenders = relationship("Tender", back_populates="issue", cascade="all, delete-orphan") | |
| assemblies = relationship("GeneralAssembly", back_populates="issue", cascade="all, delete-orphan") | |
| agencies = relationship("CommercialAgency", back_populates="issue", cascade="all, delete-orphan") | |
| announcements = relationship("CompanyAnnouncement", back_populates="issue", cascade="all, delete-orphan") | |
| class Decree(Base): | |
| """Ministerial decrees and decisions.""" | |
| __tablename__ = "decrees" | |
| id = Column(Integer, primary_key=True) | |
| issue_id = Column(Integer, ForeignKey("gazette_issues.id")) | |
| decree_number = Column(String(100)) # e.g., "11 of 2024" | |
| decree_type = Column(String(100)) # e.g., "Ministerial", "Amiri" | |
| title = Column(Text) | |
| ministry = Column(String(200)) | |
| issue_date = Column(Date) | |
| summary = Column(Text) | |
| full_text = Column(Text) | |
| page_number = Column(Integer) | |
| # Extraction metadata | |
| char_start = Column(Integer) | |
| char_end = Column(Integer) | |
| extraction_confidence = Column(Float) | |
| issue = relationship("GazetteIssue", back_populates="decrees") | |
| class BankruptcyCase(Base): | |
| """Bankruptcy proceedings and decisions.""" | |
| __tablename__ = "bankruptcy_cases" | |
| id = Column(Integer, primary_key=True) | |
| issue_id = Column(Integer, ForeignKey("gazette_issues.id")) | |
| case_number = Column(String(100)) # e.g., "2023/62" | |
| automated_number = Column(String(100)) # e.g., "230739560" | |
| # Debtor information | |
| debtor_name = Column(String(300)) | |
| debtor_type = Column(String(50)) # "individual" or "company" | |
| # Creditor information (JSON array of creditors) | |
| creditors = Column(JSON) # [{name, type, amount}] | |
| # Case details | |
| judge_name = Column(String(200)) | |
| trustee_name = Column(String(200)) | |
| trustee_fees = Column(Float) | |
| cessation_date = Column(Date) | |
| decision_date = Column(Date) | |
| decision_type = Column(String(100)) # "opened", "rejected", etc. | |
| total_debt = Column(Float) | |
| full_text = Column(Text) | |
| page_number = Column(Integer) | |
| issue = relationship("GazetteIssue", back_populates="bankruptcy_cases") | |
| class Tender(Base): | |
| """Tender and practice announcements.""" | |
| __tablename__ = "tenders" | |
| id = Column(Integer, primary_key=True) | |
| issue_id = Column(Integer, ForeignKey("gazette_issues.id")) | |
| tender_number = Column(String(100)) # e.g., "12-2023/2024" | |
| tender_type = Column(String(50)) # "tender", "practice", "auction" | |
| # Details | |
| subject = Column(Text) | |
| ministry = Column(String(300)) | |
| requesting_entity = Column(String(300)) | |
| # Dates | |
| issuance_date = Column(Date) | |
| closing_date = Column(Date) | |
| postponed_to = Column(Date) | |
| # Financial | |
| document_fee = Column(Float) | |
| initial_guarantee = Column(Float) | |
| currency = Column(String(10), default="KWD") | |
| # Location | |
| location = Column(String(300)) | |
| is_postponed = Column(Boolean, default=False) | |
| full_text = Column(Text) | |
| page_number = Column(Integer) | |
| issue = relationship("GazetteIssue", back_populates="tenders") | |
| class GeneralAssembly(Base): | |
| """Extraordinary and Ordinary General Assembly minutes.""" | |
| __tablename__ = "general_assemblies" | |
| id = Column(Integer, primary_key=True) | |
| issue_id = Column(Integer, ForeignKey("gazette_issues.id")) | |
| # Company information | |
| company_name = Column(String(500)) | |
| company_name_arabic = Column(String(500)) | |
| company_type = Column(String(100)) # "K.S.C.C.", "K.S.C.P.", etc. | |
| # Meeting details | |
| assembly_type = Column(String(50)) # "Extraordinary", "Ordinary" | |
| meeting_date = Column(Date) | |
| meeting_time = Column(String(50)) | |
| meeting_location = Column(String(500)) | |
| attendance_percentage = Column(Float) | |
| total_shares_present = Column(Integer) | |
| # Attendees (JSON array) | |
| attendees = Column(JSON) # [{name, role, company}] | |
| # Resolutions (JSON array) | |
| resolutions = Column(JSON) # [{type, description, approved}] | |
| chairman = Column(String(200)) | |
| full_text = Column(Text) | |
| page_number = Column(Integer) | |
| issue = relationship("GazetteIssue", back_populates="assemblies") | |
| class CommercialAgency(Base): | |
| """Commercial agency registrations.""" | |
| __tablename__ = "commercial_agencies" | |
| id = Column(Integer, primary_key=True) | |
| issue_id = Column(Integer, ForeignKey("gazette_issues.id")) | |
| registration_number = Column(String(100)) # e.g., "00140/2024" | |
| # Agent (Kuwaiti company) | |
| agent_name = Column(String(500)) | |
| agent_name_arabic = Column(String(500)) | |
| # Principal (Foreign company) | |
| principal_name = Column(String(500)) | |
| principal_country = Column(String(100)) | |
| # Agency details | |
| activity_description = Column(Text) | |
| start_date = Column(Date) | |
| end_date = Column(Date) | |
| full_text = Column(Text) | |
| page_number = Column(Integer) | |
| issue = relationship("GazetteIssue", back_populates="agencies") | |
| class CompanyAnnouncement(Base): | |
| """Company announcements (dissolution, amendments, transfers).""" | |
| __tablename__ = "company_announcements" | |
| id = Column(Integer, primary_key=True) | |
| issue_id = Column(Integer, ForeignKey("gazette_issues.id")) | |
| # Company information | |
| company_name = Column(String(500)) | |
| company_name_arabic = Column(String(500)) | |
| legal_entity = Column(String(100)) # "LLC", "One-Person Company", etc. | |
| # Announcement details | |
| announcement_type = Column(String(100)) # "dissolution", "amendment", "transfer", "conversion" | |
| # For amendments | |
| amendment_details = Column(JSON) # {before, after, article} | |
| # For transfers | |
| license_number = Column(String(100)) | |
| assignor = Column(String(300)) | |
| assignee = Column(String(300)) | |
| trade_name = Column(String(500)) | |
| business_type = Column(String(300)) | |
| location = Column(String(300)) | |
| # For conversions | |
| from_entity_type = Column(String(100)) | |
| to_entity_type = Column(String(100)) | |
| full_text = Column(Text) | |
| page_number = Column(Integer) | |
| issue = relationship("GazetteIssue", back_populates="announcements") | |
| # Knowledge Graph Tables | |
| class Entity(Base): | |
| """Generic entity for knowledge graph.""" | |
| __tablename__ = "entities" | |
| id = Column(Integer, primary_key=True) | |
| entity_type = Column(String(100)) # "person", "company", "ministry", etc. | |
| name = Column(String(500), index=True) | |
| attributes = Column(JSON) | |
| source_table = Column(String(100)) # Which table this came from | |
| source_id = Column(Integer) # ID in the source table | |
| class Relationship(Base): | |
| """Relationships between entities.""" | |
| __tablename__ = "relationships" | |
| id = Column(Integer, primary_key=True) | |
| source_entity_id = Column(Integer, ForeignKey("entities.id")) | |
| target_entity_id = Column(Integer, ForeignKey("entities.id")) | |
| relationship_type = Column(String(100)) # "represents", "manages", "owns", etc. | |
| attributes = Column(JSON) | |
| source_entity = relationship("Entity", foreign_keys=[source_entity_id]) | |
| target_entity = relationship("Entity", foreign_keys=[target_entity_id]) | |