Upload folder using huggingface_hub
Browse files- mecari/analyzers/mecab.py +26 -6
mecari/analyzers/mecab.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
# -*- coding: utf-8 -*-
|
| 3 |
|
| 4 |
import os
|
|
|
|
| 5 |
import subprocess
|
| 6 |
import tempfile
|
| 7 |
from typing import Dict, List
|
|
@@ -49,6 +50,7 @@ class MeCabAnalyzer:
|
|
| 49 |
if jumandic_path is None:
|
| 50 |
candidates = [
|
| 51 |
"/var/lib/mecab/dic/juman-utf8",
|
|
|
|
| 52 |
"/usr/lib/x86_64-linux-gnu/mecab/dic/juman-utf8",
|
| 53 |
]
|
| 54 |
ipadic_candidates = [
|
|
@@ -62,10 +64,23 @@ class MeCabAnalyzer:
|
|
| 62 |
else:
|
| 63 |
self.jumandic_path = jumandic_path
|
| 64 |
|
| 65 |
-
#
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
def version(self) -> str:
|
| 71 |
try:
|
|
@@ -81,6 +96,13 @@ class MeCabAnalyzer:
|
|
| 81 |
temp_file = f.name
|
| 82 |
try:
|
| 83 |
fmt = "%pi\t%m\t%H\t%ps\t%pe\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
cmd = [self.mecab_bin]
|
| 85 |
# Pass dictionary only if we have a resolvable path
|
| 86 |
if isinstance(self.jumandic_path, str) and os.path.isdir(self.jumandic_path):
|
|
@@ -90,8 +112,6 @@ class MeCabAnalyzer:
|
|
| 90 |
stdout = result.stdout
|
| 91 |
finally:
|
| 92 |
try:
|
| 93 |
-
import os
|
| 94 |
-
|
| 95 |
os.unlink(temp_file)
|
| 96 |
except Exception:
|
| 97 |
pass
|
|
|
|
| 2 |
# -*- coding: utf-8 -*-
|
| 3 |
|
| 4 |
import os
|
| 5 |
+
import shutil
|
| 6 |
import subprocess
|
| 7 |
import tempfile
|
| 8 |
from typing import Dict, List
|
|
|
|
| 50 |
if jumandic_path is None:
|
| 51 |
candidates = [
|
| 52 |
"/var/lib/mecab/dic/juman-utf8",
|
| 53 |
+
"/usr/lib/mecab/dic/juman-utf8",
|
| 54 |
"/usr/lib/x86_64-linux-gnu/mecab/dic/juman-utf8",
|
| 55 |
]
|
| 56 |
ipadic_candidates = [
|
|
|
|
| 64 |
else:
|
| 65 |
self.jumandic_path = jumandic_path
|
| 66 |
|
| 67 |
+
# Resolve MeCab binary path robustly
|
| 68 |
+
if mecab_bin:
|
| 69 |
+
self.mecab_bin = mecab_bin
|
| 70 |
+
elif os.getenv("MECAB_BIN"):
|
| 71 |
+
self.mecab_bin = os.getenv("MECAB_BIN") # type: ignore[assignment]
|
| 72 |
+
else:
|
| 73 |
+
resolved = shutil.which("mecab")
|
| 74 |
+
if resolved:
|
| 75 |
+
self.mecab_bin = resolved
|
| 76 |
+
else:
|
| 77 |
+
for cand in ["/usr/bin/mecab", "/usr/local/bin/mecab", "/bin/mecab"]:
|
| 78 |
+
if os.path.exists(cand):
|
| 79 |
+
self.mecab_bin = cand
|
| 80 |
+
break
|
| 81 |
+
else:
|
| 82 |
+
# Fallback; will be validated at runtime
|
| 83 |
+
self.mecab_bin = "mecab"
|
| 84 |
|
| 85 |
def version(self) -> str:
|
| 86 |
try:
|
|
|
|
| 96 |
temp_file = f.name
|
| 97 |
try:
|
| 98 |
fmt = "%pi\t%m\t%H\t%ps\t%pe\n"
|
| 99 |
+
# Validate mecab binary is available
|
| 100 |
+
if not (os.path.isabs(self.mecab_bin) and os.path.exists(self.mecab_bin)):
|
| 101 |
+
if shutil.which(self.mecab_bin) is None:
|
| 102 |
+
raise FileNotFoundError(
|
| 103 |
+
"MeCab binary not found. Ensure packages.txt installs 'mecab' and set MECAB_BIN if needed."
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
cmd = [self.mecab_bin]
|
| 107 |
# Pass dictionary only if we have a resolvable path
|
| 108 |
if isinstance(self.jumandic_path, str) and os.path.isdir(self.jumandic_path):
|
|
|
|
| 112 |
stdout = result.stdout
|
| 113 |
finally:
|
| 114 |
try:
|
|
|
|
|
|
|
| 115 |
os.unlink(temp_file)
|
| 116 |
except Exception:
|
| 117 |
pass
|