zbller commited on
Commit
9fde849
·
verified ·
1 Parent(s): 34c8a90

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. mecari/analyzers/mecab.py +26 -6
mecari/analyzers/mecab.py CHANGED
@@ -2,6 +2,7 @@
2
  # -*- coding: utf-8 -*-
3
 
4
  import os
 
5
  import subprocess
6
  import tempfile
7
  from typing import Dict, List
@@ -49,6 +50,7 @@ class MeCabAnalyzer:
49
  if jumandic_path is None:
50
  candidates = [
51
  "/var/lib/mecab/dic/juman-utf8",
 
52
  "/usr/lib/x86_64-linux-gnu/mecab/dic/juman-utf8",
53
  ]
54
  ipadic_candidates = [
@@ -62,10 +64,23 @@ class MeCabAnalyzer:
62
  else:
63
  self.jumandic_path = jumandic_path
64
 
65
- # Allow selecting a specific mecab binary via arg or env var; default to common path
66
- self.mecab_bin = mecab_bin or os.getenv("MECAB_BIN") or (
67
- "/usr/bin/mecab" if os.path.exists("/usr/bin/mecab") else "mecab"
68
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  def version(self) -> str:
71
  try:
@@ -81,6 +96,13 @@ class MeCabAnalyzer:
81
  temp_file = f.name
82
  try:
83
  fmt = "%pi\t%m\t%H\t%ps\t%pe\n"
 
 
 
 
 
 
 
84
  cmd = [self.mecab_bin]
85
  # Pass dictionary only if we have a resolvable path
86
  if isinstance(self.jumandic_path, str) and os.path.isdir(self.jumandic_path):
@@ -90,8 +112,6 @@ class MeCabAnalyzer:
90
  stdout = result.stdout
91
  finally:
92
  try:
93
- import os
94
-
95
  os.unlink(temp_file)
96
  except Exception:
97
  pass
 
2
  # -*- coding: utf-8 -*-
3
 
4
  import os
5
+ import shutil
6
  import subprocess
7
  import tempfile
8
  from typing import Dict, List
 
50
  if jumandic_path is None:
51
  candidates = [
52
  "/var/lib/mecab/dic/juman-utf8",
53
+ "/usr/lib/mecab/dic/juman-utf8",
54
  "/usr/lib/x86_64-linux-gnu/mecab/dic/juman-utf8",
55
  ]
56
  ipadic_candidates = [
 
64
  else:
65
  self.jumandic_path = jumandic_path
66
 
67
+ # Resolve MeCab binary path robustly
68
+ if mecab_bin:
69
+ self.mecab_bin = mecab_bin
70
+ elif os.getenv("MECAB_BIN"):
71
+ self.mecab_bin = os.getenv("MECAB_BIN") # type: ignore[assignment]
72
+ else:
73
+ resolved = shutil.which("mecab")
74
+ if resolved:
75
+ self.mecab_bin = resolved
76
+ else:
77
+ for cand in ["/usr/bin/mecab", "/usr/local/bin/mecab", "/bin/mecab"]:
78
+ if os.path.exists(cand):
79
+ self.mecab_bin = cand
80
+ break
81
+ else:
82
+ # Fallback; will be validated at runtime
83
+ self.mecab_bin = "mecab"
84
 
85
  def version(self) -> str:
86
  try:
 
96
  temp_file = f.name
97
  try:
98
  fmt = "%pi\t%m\t%H\t%ps\t%pe\n"
99
+ # Validate mecab binary is available
100
+ if not (os.path.isabs(self.mecab_bin) and os.path.exists(self.mecab_bin)):
101
+ if shutil.which(self.mecab_bin) is None:
102
+ raise FileNotFoundError(
103
+ "MeCab binary not found. Ensure packages.txt installs 'mecab' and set MECAB_BIN if needed."
104
+ )
105
+
106
  cmd = [self.mecab_bin]
107
  # Pass dictionary only if we have a resolvable path
108
  if isinstance(self.jumandic_path, str) and os.path.isdir(self.jumandic_path):
 
112
  stdout = result.stdout
113
  finally:
114
  try:
 
 
115
  os.unlink(temp_file)
116
  except Exception:
117
  pass