|
|
""" |
|
|
Modèles Pydantic pour l'agent Reader/Summarizer. |
|
|
Définit les structures de données pour les documents et leurs résumés. |
|
|
""" |
|
|
|
|
|
from typing import List, Optional, Dict, Any |
|
|
from datetime import datetime |
|
|
from pydantic import BaseModel, Field, HttpUrl |
|
|
from enum import Enum |
|
|
|
|
|
|
|
|
class DocumentType(str, Enum): |
|
|
"""Types de documents supportés.""" |
|
|
ARTICLE = "article" |
|
|
BLOG_POST = "blog_post" |
|
|
ACADEMIC_PAPER = "academic_paper" |
|
|
NEWS = "news" |
|
|
REPORT = "report" |
|
|
OTHER = "other" |
|
|
|
|
|
|
|
|
class Document(BaseModel): |
|
|
""" |
|
|
Modèle pour un document à analyser. |
|
|
""" |
|
|
title: str = Field(..., description="Titre du document") |
|
|
url: HttpUrl = Field(..., description="URL source du document") |
|
|
content: str = Field(..., description="Contenu textuel complet du document") |
|
|
doc_type: DocumentType = Field(default=DocumentType.ARTICLE, description="Type de document") |
|
|
author: Optional[str] = Field(default=None, description="Auteur du document") |
|
|
published_date: Optional[datetime] = Field(default=None, description="Date de publication") |
|
|
source: Optional[str] = Field(default=None, description="Site ou publication source") |
|
|
word_count: int = Field(default=0, ge=0, description="Nombre de mots dans le document") |
|
|
language: str = Field(default="fr", description="Langue du document (code ISO)") |
|
|
|
|
|
class Config: |
|
|
json_schema_extra = { |
|
|
"example": { |
|
|
"title": "L'impact de l'IA sur le futur du travail", |
|
|
"url": "https://example.com/article-ia-travail", |
|
|
"content": "L'intelligence artificielle transforme rapidement...", |
|
|
"doc_type": "article", |
|
|
"author": "Marie Martin", |
|
|
"published_date": "2024-01-15T09:30:00Z", |
|
|
"source": "TechMag", |
|
|
"word_count": 1500, |
|
|
"language": "fr" |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
class ExtractionInput(BaseModel): |
|
|
""" |
|
|
Input pour l'agent Content Extractor. |
|
|
""" |
|
|
urls: List[str] = Field(..., description="Liste des URLs à extraire", min_items=1) |
|
|
content_filters: Optional[Dict[str, Any]] = Field( |
|
|
default_factory=dict, |
|
|
description="Filtres à appliquer au contenu extrait" |
|
|
) |
|
|
extraction_options: Optional[Dict[str, Any]] = Field( |
|
|
default_factory=dict, |
|
|
description="Options d'extraction spécifiques" |
|
|
) |
|
|
|
|
|
class Config: |
|
|
json_schema_extra = { |
|
|
"example": { |
|
|
"urls": [ |
|
|
"https://example.com/article1", |
|
|
"https://example.com/article2.pdf" |
|
|
], |
|
|
"content_filters": { |
|
|
"min_content_length": 100, |
|
|
"max_content_length": 10000, |
|
|
"language": "fr", |
|
|
"required_keywords": ["intelligence artificielle"] |
|
|
}, |
|
|
"extraction_options": { |
|
|
"timeout": 30, |
|
|
"max_retries": 2 |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
class ExtractionResult(BaseModel): |
|
|
""" |
|
|
Résultat de l'extraction de contenu. |
|
|
""" |
|
|
documents: List[Document] = Field(..., description="Documents extraits avec succès") |
|
|
total_urls: int = Field(..., ge=0, description="Nombre total d'URLs traitées") |
|
|
successful_extractions: int = Field(..., ge=0, description="Nombre d'extractions réussies") |
|
|
failed_extractions: int = Field(..., ge=0, description="Nombre d'extractions échouées") |
|
|
failed_urls: List[str] = Field(default_factory=list, description="URLs qui ont échoué lors de l'extraction") |
|
|
execution_time: float = Field(..., ge=0, description="Temps d'exécution en secondes") |
|
|
extraction_stats: Dict[str, Any] = Field( |
|
|
default_factory=dict, |
|
|
description="Statistiques détaillées de l'extraction" |
|
|
) |
|
|
|
|
|
class Config: |
|
|
json_schema_extra = { |
|
|
"example": { |
|
|
"documents": [], |
|
|
"total_urls": 5, |
|
|
"successful_extractions": 4, |
|
|
"failed_extractions": 1, |
|
|
"execution_time": 12.5, |
|
|
"extraction_stats": { |
|
|
"total_words": 5000, |
|
|
"average_words_per_doc": 1250, |
|
|
"doc_types": {"article": 3, "pdf": 1}, |
|
|
"languages": {"fr": 4} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
class KeyPoint(BaseModel): |
|
|
""" |
|
|
Modèle pour un point clé extrait d'un document. |
|
|
""" |
|
|
title: str = Field(..., description="Titre du point clé") |
|
|
content: str = Field(..., description="Contenu détaillé du point") |
|
|
importance: float = Field(..., ge=0, le=1, description="Score d'importance (0-1)") |
|
|
category: Optional[str] = Field(default=None, description="Catégorie du point clé") |
|
|
|
|
|
class Config: |
|
|
json_schema_extra = { |
|
|
"example": { |
|
|
"title": "Automatisation des tâches répétitives", |
|
|
"content": "L'IA permet d'automatiser 30% des tâches actuelles...", |
|
|
"importance": 0.9, |
|
|
"category": "automatisation" |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
class Citation(BaseModel): |
|
|
""" |
|
|
Modèle pour une citation importante extraite du document. |
|
|
""" |
|
|
text: str = Field(..., description="Texte de la citation") |
|
|
author: Optional[str] = Field(default=None, description="Auteur de la citation") |
|
|
context: Optional[str] = Field(default=None, description="Contexte de la citation") |
|
|
page_number: Optional[int] = Field(default=None, description="Numéro de page (si applicable)") |
|
|
|
|
|
class Config: |
|
|
json_schema_extra = { |
|
|
"example": { |
|
|
"text": "L'IA ne remplacera pas les humains, elle augmentera leurs capacités", |
|
|
"author": "Dr. Jean Dupont", |
|
|
"context": "Conclusion de l'étude sur l'IA et l'emploi", |
|
|
"page_number": None |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
class DocumentSummary(BaseModel): |
|
|
""" |
|
|
Modèle pour le résumé d'un document. |
|
|
""" |
|
|
document_id: str = Field(..., description="Identifiant unique du document") |
|
|
title: str = Field(..., description="Titre du document original") |
|
|
url: HttpUrl = Field(..., description="URL du document original") |
|
|
|
|
|
|
|
|
executive_summary: str = Field(..., description="Résumé exécutif (2-3 phrases)") |
|
|
detailed_summary: str = Field(..., description="Résumé détaillé (1-2 paragraphes)") |
|
|
|
|
|
|
|
|
key_points: List[KeyPoint] = Field(default_factory=list, description="Points clés extraits") |
|
|
main_arguments: List[str] = Field(default_factory=list, description="Arguments principaux") |
|
|
|
|
|
|
|
|
important_citations: List[Citation] = Field(default_factory=list, description="Citations importantes") |
|
|
statistics: List[str] = Field(default_factory=list, description="Statistiques mentionnées") |
|
|
|
|
|
|
|
|
sentiment: Optional[str] = Field(default=None, description="Sentiment général (positif/neutre/négatif)") |
|
|
bias_assessment: Optional[str] = Field(default=None, description="Évaluation des biais potentiels") |
|
|
credibility_score: Optional[float] = Field(default=None, ge=0, le=1, description="Score de crédibilité (0-1)") |
|
|
|
|
|
|
|
|
processed_at: datetime = Field(default_factory=datetime.now, description="Horodatage du traitement") |
|
|
processing_time: float = Field(default=0.0, ge=0, description="Temps de traitement en secondes") |
|
|
|
|
|
class Config: |
|
|
json_schema_extra = { |
|
|
"example": { |
|
|
"document_id": "doc_123", |
|
|
"title": "L'impact de l'IA sur le futur du travail", |
|
|
"url": "https://example.com/article", |
|
|
"executive_summary": "L'IA transformera 60% des emplois d'ici 2030...", |
|
|
"detailed_summary": "Cette étude approfondie examine...", |
|
|
"key_points": [], |
|
|
"sentiment": "neutre", |
|
|
"credibility_score": 0.8, |
|
|
"processed_at": "2024-01-15T10:15:00Z", |
|
|
"processing_time": 5.2 |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
class SummarizationOutput(BaseModel): |
|
|
""" |
|
|
Modèle pour l'output complet de l'agent Reader/Summarizer. |
|
|
""" |
|
|
summaries: List[DocumentSummary] = Field(..., description="Liste des résumés de documents") |
|
|
total_documents: int = Field(..., ge=0, description="Nombre total de documents traités") |
|
|
total_processing_time: float = Field(..., ge=0, description="Temps total de traitement") |
|
|
average_credibility: Optional[float] = Field(default=None, ge=0, le=1, description="Score de crédibilité moyen") |
|
|
|
|
|
|
|
|
common_themes: List[str] = Field(default_factory=list, description="Thèmes récurrents identifiés") |
|
|
consensus_points: List[str] = Field(default_factory=list, description="Points de consensus entre les sources") |
|
|
conflicting_views: List[str] = Field(default_factory=list, description="Points de vue conflictuels") |
|
|
|
|
|
timestamp: datetime = Field(default_factory=datetime.now, description="Horodatage de l'analyse") |
|
|
|
|
|
class Config: |
|
|
json_schema_extra = { |
|
|
"example": { |
|
|
"summaries": [], |
|
|
"total_documents": 5, |
|
|
"total_processing_time": 25.6, |
|
|
"average_credibility": 0.75, |
|
|
"common_themes": ["automatisation", "formation", "adaptation"], |
|
|
"timestamp": "2024-01-15T10:30:00Z" |
|
|
} |
|
|
} |