File size: 828 Bytes
4ff66ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


#modules/extractor_module.py
from typing import Dict, List
from core.base_module import AIModule
import spacy

class ExtractorModule(AIModule):
    def __init__(self):
        self.nlp = spacy.load("en_core_web_sm")
    
    async def process(self, input_data: Dict) -> Dict:
        text = input_data.get("text", "")
        doc = self.nlp(text)
        
        entities = [
            {"text": ent.text, "label": ent.label_}
            for ent in doc.ents
        ]
        
        return {
            "entities": entities,
            "num_entities": len(entities)
        }
    
    async def get_status(self) -> Dict:
        return {"status": "operational", "model": "en_core_web_sm"}
    
    @property
    def capabilities(self) -> List[str]:
        return ["named-entity-recognition", "information-extraction"]