Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -27,10 +27,12 @@ def detect_encoding(file_bytes):
|
|
| 27 |
result = chardet.detect(file_bytes)
|
| 28 |
return result.get('encoding', 'utf-8')
|
| 29 |
|
| 30 |
-
def extract_entities_from_file(
|
| 31 |
-
|
|
|
|
| 32 |
encoding = detect_encoding(file_bytes)
|
| 33 |
text = file_bytes.decode(encoding, errors='ignore')
|
| 34 |
doc = nlp(text)
|
| 35 |
results = [(ent.text, ent.label_) for ent in doc.ents]
|
| 36 |
return results
|
|
|
|
|
|
| 27 |
result = chardet.detect(file_bytes)
|
| 28 |
return result.get('encoding', 'utf-8')
|
| 29 |
|
| 30 |
+
def extract_entities_from_file(file_path):
|
| 31 |
+
with open(file_path, "rb") as f:
|
| 32 |
+
file_bytes = f.read()
|
| 33 |
encoding = detect_encoding(file_bytes)
|
| 34 |
text = file_bytes.decode(encoding, errors='ignore')
|
| 35 |
doc = nlp(text)
|
| 36 |
results = [(ent.text, ent.label_) for ent in doc.ents]
|
| 37 |
return results
|
| 38 |
+
|