Spaces:

goberoi
/

gliner-test

Sleeping

goberoi commited on May 20

Commit

d1923ad

verified ·

1 Parent(s): 8774b52

Create main.py

Files changed (1) hide show

main.py ADDED Viewed

+import chardet
+import spacy
+from gliner_spacy import GLiNERComponent
+# Load SpaCy and add GLiNER to the pipeline
+nlp = spacy.load("en_core_web_lg")
+nlp.add_pipe("gliner_spacy", config={
+    "labels": ["PERSON", "ORGANIZATION", "LOCATION", "DISEASE"],
+    "model": "urchade/gliner_multi_pii-v1"
+}, last=True)
+def detect_encoding(file_bytes):
+    result = chardet.detect(file_bytes)
+    return result.get('encoding', 'utf-8')
+def extract_entities_from_file(file):
+    file_bytes = file.read()
+    encoding = detect_encoding(file_bytes)
+    text = file_bytes.decode(encoding, errors='ignore')
+    doc = nlp(text)
+    results = [(ent.text, ent.label_) for ent in doc.ents]
+    return results