File size: 2,536 Bytes
353b9f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from datetime import datetime
from typing import Optional, List
from sqlmodel import SQLModel, Field, Column, Text
from sqlalchemy import JSON
from pydantic import BaseModel


class Paper(SQLModel, table=True):
    """Paper model for storing arXiv papers with AI analysis."""
    id: Optional[int] = Field(default=None, primary_key=True)
    arxiv_id: str = Field(unique=True, index=True)
    title: str
    authors: str
    abstract: str = Field(sa_column=Column(Text))
    categories: str
    published: datetime
    updated: datetime
    pdf_url: str
    thumbnail_url: Optional[str] = None

    # AI-generated analysis fields
    summary_zh: Optional[str] = Field(default=None, sa_column=Column(Text))
    relevance_score: Optional[float] = Field(default=None, ge=0, le=10)
    relevance_reason: Optional[str] = Field(default=None, sa_column=Column(Text))
    heuristic_idea: Optional[str] = Field(default=None, sa_column=Column(Text))

    # Metadata
    is_processed: bool = Field(default=False)
    processing_status: str = Field(default="pending", index=True)
    created_at: datetime = Field(default_factory=datetime.utcnow)
    processed_at: Optional[datetime] = None


class PaperCreate(SQLModel):
    """Schema for creating a new paper."""
    arxiv_id: str
    title: str
    authors: str
    abstract: str
    categories: str
    published: datetime
    updated: datetime
    pdf_url: str


class PaperRead(SQLModel):
    """Schema for reading paper data."""
    id: int
    arxiv_id: str
    title: str
    authors: str
    abstract: str
    categories: str
    published: datetime
    updated: datetime
    pdf_url: str
    thumbnail_url: Optional[str] = None
    summary_zh: Optional[str]
    relevance_score: Optional[float]
    relevance_reason: Optional[str]
    heuristic_idea: Optional[str]
    is_processed: bool
    processing_status: str
    created_at: datetime
    processed_at: Optional[datetime]


class LLMAnalysis(BaseModel):
    """Schema for LLM analysis response."""
    summary_zh: str
    relevance_score: float
    relevance_reason: str
    heuristic_idea: str


class AppSettings(SQLModel, table=True):
    """Application settings stored in DB."""
    id: int = Field(default=1, primary_key=True)
    research_focus: str = Field(sa_column=Column(Text, default=""))
    focus_keywords: List[str] = Field(default=[], sa_column=Column(JSON))
    system_prompt: str = Field(sa_column=Column(Text, default=""))
    arxiv_categories: List[str] = Field(sa_column=Column(JSON, default=["cs.CV", "cs.LG"]))