Spaces:
Runtime error
Runtime error
| """ | |
| Schemas for structured data extraction from pharmaceutical R&D documents. | |
| These schemas define the structure of objects stored in the knowledge base. | |
| """ | |
| from typing import Dict, List, Optional, Union, Any, TypedDict | |
| # ========================================================================= | |
| # Core Entity Types | |
| # ========================================================================= | |
| class Document(TypedDict): | |
| """Schema for document metadata.""" | |
| document_id: str | |
| protocol_id: Optional[str] | |
| type: str # "Protocol", "SAP", "CSR", "IB", etc. | |
| version: Optional[str] | |
| date: Optional[str] | |
| title: str | |
| status: Optional[str] | |
| related_protocols: Optional[List[str]] | |
| filename: str | |
| path: str | |
| class Study(TypedDict): | |
| """Schema for study information.""" | |
| protocol_id: str | |
| title: str | |
| phase: Optional[str] | |
| status: Optional[str] | |
| design_type: Optional[str] | |
| start_date: Optional[str] | |
| end_date: Optional[str] | |
| compound_id: Optional[str] | |
| indication: Optional[str] | |
| planned_enrollment: Optional[str] | |
| class Compound(TypedDict): | |
| """Schema for investigational product information.""" | |
| compound_id: str | |
| name: str | |
| mechanism_of_action: Optional[str] | |
| drug_class: Optional[str] | |
| targets: Optional[List[str]] | |
| indications: Optional[List[str]] | |
| class StudyObjective(TypedDict): | |
| """Schema for study objectives.""" | |
| objective_id: str | |
| protocol_id: str | |
| type: str # "Primary", "Secondary", "Exploratory" | |
| description: str | |
| endpoints: Optional[List[str]] # IDs of endpoints measuring this objective | |
| class Endpoint(TypedDict): | |
| """Schema for endpoints.""" | |
| endpoint_id: str | |
| protocol_id: str | |
| type: str # "Safety", "Efficacy", "PK", "PD", "Biomarker" | |
| name: str | |
| definition: str | |
| measurement_unit: Optional[str] | |
| timepoints: Optional[List[str]] | |
| analysis_method: Optional[str] | |
| objective_id: Optional[str] # ID of the objective this endpoint supports | |
| class PopulationCriterion(TypedDict): | |
| """Schema for inclusion/exclusion criteria.""" | |
| criterion_id: str | |
| protocol_id: str | |
| criterion_type: str # "Inclusion" or "Exclusion" | |
| number: Optional[int] # E.g., Inclusion criterion #3 | |
| text: str # Full text of the criterion | |
| attribute: Optional[str] # E.g., "Age", "BMI" | |
| operator: Optional[str] # E.g., ">", "<", "=" | |
| value: Optional[str] # E.g., "18 years", "30 kg/m²" | |
| class StudyArm(TypedDict): | |
| """Schema for study arms/cohorts.""" | |
| arm_id: str | |
| protocol_id: str | |
| name: str | |
| description: str | |
| treatment: str # Drug/placebo | |
| dose: Optional[str] | |
| frequency: Optional[str] | |
| duration: Optional[str] | |
| planned_subjects: Optional[int] | |
| class Assessment(TypedDict): | |
| """Schema for study assessments/procedures.""" | |
| assessment_id: str | |
| protocol_id: str | |
| name: str # E.g., "OGTT", "ECG" | |
| type: str # "Safety", "PK", "PD", etc. | |
| description: str | |
| method: Optional[str] | |
| timing: Optional[str] # Simplified from complex structure | |
| analytes: Optional[List[str]] # What's being measured | |
| class Analyte(TypedDict): | |
| """Schema for analytes/biomarkers measured.""" | |
| analyte_id: str | |
| name: str | |
| type: Optional[str] # E.g., "Biomarker", "PK", "Safety" | |
| # ========================================================================= | |
| # Graph State Types (for LangGraph) | |
| # ========================================================================= | |
| class DocumentExtractionState(TypedDict): | |
| """State for document extraction workflow.""" | |
| document_path: str | |
| document_text: Optional[str] | |
| document_metadata: Optional[Document] | |
| sections: Optional[Dict[str, str]] | |
| extracted_study: Optional[Study] | |
| extracted_objectives: Optional[List[StudyObjective]] | |
| extracted_endpoints: Optional[List[Endpoint]] | |
| extracted_population: Optional[Dict[str, List[PopulationCriterion]]] | |
| extracted_design: Optional[Dict] | |
| extracted_assessments: Optional[List[Assessment]] | |
| extracted_compound: Optional[Compound] | |
| status: str | |
| error: Optional[str] | |
| vector_chunks: Optional[List[Dict]] | |
| class ProtocolCoachState(TypedDict): | |
| """State for protocol coach chatbot workflow.""" | |
| query: str | |
| retrieved_context: Optional[List[Dict]] | |
| chat_history: Optional[List[Dict]] | |
| response: Optional[str] | |
| error: Optional[str] | |
| class ContentAuthoringState(TypedDict): | |
| """State for content authoring workflow.""" | |
| section_type: str | |
| target_protocol_id: Optional[str] | |
| style_guide: Optional[str] | |
| retrieved_context: Optional[List[Dict]] | |
| generated_content: Optional[str] | |
| feedback: Optional[str] | |
| revised_content: Optional[str] | |
| error: Optional[str] | |
| class TraceabilityState(TypedDict): | |
| """State for document traceability analysis workflow.""" | |
| source_document_id: str | |
| target_document_id: str | |
| entity_type: str # "objectives", "endpoints", "population", etc. | |
| source_entities: Optional[List[Dict]] | |
| target_entities: Optional[List[Dict]] | |
| matched_pairs: Optional[List[Dict]] | |
| analysis: Optional[str] | |
| error: Optional[str] |