Spaces:
Sleeping
Sleeping
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from utils.snowflake_id import snowflake_id_str | |
| class BaseBioDocument: | |
| """ | |
| 生物医学文档基础类 | |
| 包含所有搜索类型共有的字段 | |
| """ | |
| bio_id: Optional[str] = field(default_factory=snowflake_id_str) | |
| title: Optional[str] = None | |
| text: Optional[str] = None | |
| source: Optional[str] = None | |
| source_id: Optional[str] = None | |
| class PubMedDocument(BaseBioDocument): | |
| """ | |
| PubMed学术文献文档 | |
| 包含学术文献特有的字段 | |
| """ | |
| abstract: Optional[str] = None | |
| authors: Optional[str] = None | |
| doi: Optional[str] = None | |
| journal: Optional[str] = None | |
| pub_date: Optional[str] = None | |
| if_score: Optional[float] = None | |
| url: Optional[str] = None | |
| def __post_init__(self): | |
| if self.source is None: | |
| self.source = "pubmed" | |
| class PersonalDocument(BaseBioDocument): | |
| """ | |
| 个人向量搜索文档 | |
| 包含个人文档特有的字段 | |
| """ | |
| if_score: Optional[float] = None | |
| doc_id: Optional[str] = None | |
| index: Optional[int] = 0 | |
| user_id: Optional[str] = None | |
| file_name: Optional[str] = None | |
| def __post_init__(self): | |
| if self.source is None: | |
| self.source = "personal_vector" | |
| class WebDocument(BaseBioDocument): | |
| """ | |
| Web搜索文档 | |
| 包含网页内容特有的字段 | |
| """ | |
| url: Optional[str] = None | |
| description: Optional[str] = None | |
| def __post_init__(self): | |
| if self.source is None: | |
| self.source = "web" | |
| # 为了保持向后兼容,保留原有的BioDocument类 | |
| class BioDocument(BaseBioDocument): | |
| """ | |
| 生物医学文档(向后兼容) | |
| 包含所有可能的字段,但建议使用专门的文档类型 | |
| """ | |
| abstract: Optional[str] = None | |
| authors: Optional[str] = None | |
| doi: Optional[str] = None | |
| journal: Optional[str] = None | |
| pub_date: Optional[str] = None | |
| if_score: Optional[float] = None | |
| url: Optional[str] = None | |
| doc_id: Optional[str] = None | |
| # 工厂函数,根据source类型创建相应的文档对象 | |
| def create_bio_document(source: str, **kwargs) -> BaseBioDocument: | |
| """ | |
| 根据source类型创建相应的文档对象 | |
| Args: | |
| source: 文档来源类型 ("pubmed", "personal_vector", "web") | |
| **kwargs: 文档字段 | |
| Returns: | |
| 相应的文档对象 | |
| """ | |
| if source == "pubmed": | |
| return PubMedDocument(**kwargs) | |
| elif source == "personal_vector": | |
| return PersonalDocument(**kwargs) | |
| elif source == "web": | |
| return WebDocument(**kwargs) | |
| else: | |
| # 默认使用通用BioDocument | |
| return BioDocument(**kwargs) | |