| export enum ErrorCode { |
| SOURCE_MISSING = 4001, |
| INVALID_URL = 4002, |
| PARSE_FAILED = 4003, |
| NO_CONTENT = 4004, |
| CRAWL_FAILED = 5001, |
| DIFF_FAILED = 5002, |
| DB_SAVE_FAILED = 5003, |
| EXPORT_STRUCT_FAILED = 5004 |
| } |
|
|
| export type SourceType = 'peer_bank' | 'regulator' | 'sdk_vendor'; |
| export type CrawlFrequency = '4h' | 'daily' | 'weekly'; |
| export type Priority = 'high' | 'medium' | 'low'; |
|
|
| export interface SourceRegistry { |
| source_id: string; |
| source_name: string; |
| source_type: SourceType; |
| domain: string; |
| entry_url: string; |
| url_pattern: string; |
| parser_type: string; |
| crawl_frequency: CrawlFrequency; |
| priority: Priority; |
| enabled: boolean; |
| topic_tags: string[]; |
| created_at?: string; |
| updated_at?: string; |
| } |
|
|
| export type TriggerType = 'schedule' | 'manual' | 'webhook'; |
| export type JobStatus = 'queued' | 'running' | 'success' | 'failed'; |
|
|
| export interface CrawlJob { |
| job_id: string; |
| source_id: string; |
| trigger_type: TriggerType; |
| status: JobStatus; |
| started_at?: string; |
| ended_at?: string; |
| error_code?: string; |
| error_message?: string; |
| retry_count: number; |
| } |
|
|
| export interface RawSnapshot { |
| snapshot_id: string; |
| source_id: string; |
| job_id: string; |
| fetched_at: string; |
| content_type: string; |
| raw_body: string; |
| raw_hash: string; |
| http_status: number; |
| final_url: string; |
| } |
|
|
| export type DocStatus = 'active' | 'archived'; |
|
|
| export interface NormalizedDocument { |
| doc_id: string; |
| source_id: string; |
| snapshot_id: string; |
| title: string; |
| version_date: string; |
| effective_date: string; |
| normalized_text: string; |
| normalized_hash: string; |
| doc_status: DocStatus; |
| created_at?: string; |
| } |
|
|
| export type EmbeddingStatus = 'pending' | 'ready' | 'failed'; |
|
|
| export interface ClauseChunk { |
| chunk_id: string; |
| doc_id: string; |
| section_path: string; |
| section_title: string; |
| clause_text: string; |
| topic_tags: string[]; |
| embedding_status: EmbeddingStatus; |
| chunk_order: number; |
| created_at?: string; |
| } |
|
|
| export type ChangeType = 'added' | 'removed' | 'modified' | 'unchanged'; |
| export type ImpactLevel = 'high' | 'medium' | 'low'; |
|
|
| export interface DiffEvent { |
| event_id: string; |
| source_id: string; |
| from_doc_id: string; |
| to_doc_id: string; |
| change_type: ChangeType; |
| section_title: string; |
| old_excerpt: string; |
| new_excerpt: string; |
| topic_tags: string[]; |
| impact_level: ImpactLevel; |
| detected_at: string; |
| } |
|
|
| |
|
|
| export interface CreateSourceRequest { |
| source_name: string; |
| source_type: SourceType; |
| domain: string; |
| entry_url: string; |
| url_pattern: string; |
| parser_type: string; |
| crawl_frequency: CrawlFrequency; |
| priority: Priority; |
| enabled: boolean; |
| topic_tags: string[]; |
| } |
|
|
| export interface CreateSourceResponse { |
| source_id: string; |
| success: boolean; |
| } |
|
|
| export interface GetSourcesQuery { |
| source_type?: SourceType; |
| enabled?: boolean; |
| priority?: Priority; |
| } |
|
|
| export interface GetSourcesResponse { |
| items: SourceRegistry[]; |
| total: number; |
| } |
|
|
| export interface CreateJobsRequest { |
| source_ids: string[]; |
| trigger_type: TriggerType; |
| } |
|
|
| export interface CreateJobsResponse { |
| job_ids: string[]; |
| status: string; |
| } |
|
|
| export interface GetJobResponse { |
| job_id: string; |
| source_id: string; |
| status: JobStatus; |
| started_at?: string; |
| ended_at?: string; |
| error_code?: string | null; |
| retry_count: number; |
| } |
|
|