Update app.py
Browse files
app.py
CHANGED
|
@@ -11,9 +11,12 @@ BN_TO_EN_MODEL = "csebuetnlp/banglat5_nmt_bn_en"
|
|
| 11 |
NORMALIZER_REPO = "https://github.com/csebuetnlp/normalizer.git"
|
| 12 |
|
| 13 |
# --- Helper function to install/import normalizer ---
|
| 14 |
-
# This ensures the normalizer is available.
|
| 15 |
-
# In HF Spaces, requirements.txt is the primary method.
|
| 16 |
normalizer_module = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
try:
|
| 18 |
from normalizer import normalize as normalize_fn_imported
|
| 19 |
normalizer_module = normalize_fn_imported
|
|
@@ -21,8 +24,6 @@ try:
|
|
| 21 |
except ImportError:
|
| 22 |
print(f"Normalizer library not found. Attempting to install from {NORMALIZER_REPO}...")
|
| 23 |
try:
|
| 24 |
-
# This command installs the package directly from git.
|
| 25 |
-
# The #egg=normalizer part helps pip identify the package name.
|
| 26 |
subprocess.check_call([sys.executable, "-m", "pip", "install", f"git+{NORMALIZER_REPO}#egg=normalizer"])
|
| 27 |
from normalizer import normalize as normalize_fn_imported_after_install
|
| 28 |
normalizer_module = normalize_fn_imported_after_install
|
|
@@ -30,10 +31,9 @@ except ImportError:
|
|
| 30 |
except Exception as e:
|
| 31 |
print(f"Failed to install or import normalizer: {e}")
|
| 32 |
print("Please ensure 'git+https://github.com/csebuetnlp/normalizer.git#egg=normalizer' is in your requirements.txt for Hugging Face Spaces.")
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
normalizer_module = dummy_normalize
|
| 37 |
|
| 38 |
# --- Model Loading (Globally, when the script starts) ---
|
| 39 |
sylheti_to_bn_pipe = None
|
|
@@ -63,8 +63,6 @@ try:
|
|
| 63 |
|
| 64 |
except Exception as e:
|
| 65 |
print(f"FATAL: Error loading one or more models: {e}")
|
| 66 |
-
# To prevent the app from crashing entirely if models don't load,
|
| 67 |
-
# but it will show errors during translation.
|
| 68 |
sylheti_to_bn_pipe = None
|
| 69 |
bn_to_en_model = None
|
| 70 |
bn_to_en_tokenizer = None
|
|
@@ -78,7 +76,9 @@ def translate_sylheti_to_english_gradio(sylheti_text_input):
|
|
| 78 |
return "Error: Sylheti-to-Bengali model not loaded. Check logs.", ""
|
| 79 |
if not bn_to_en_model or not bn_to_en_tokenizer:
|
| 80 |
return "Error: Bengali-to-English model not loaded. Check logs.", ""
|
| 81 |
-
|
|
|
|
|
|
|
| 82 |
return "Error: Bengali normalizer library not available. Check logs.", ""
|
| 83 |
|
| 84 |
|
|
@@ -99,18 +99,24 @@ def translate_sylheti_to_english_gradio(sylheti_text_input):
|
|
| 99 |
except Exception as e:
|
| 100 |
print(f"Error during Sylheti to Bengali translation: {e}")
|
| 101 |
bengali_text_intermediate = f"Sylheti->Bengali Error: {str(e)}"
|
| 102 |
-
return bengali_text_intermediate, english_text_final
|
| 103 |
|
| 104 |
# Step 2: Bengali → English
|
| 105 |
try:
|
| 106 |
print(f"Normalizing and translating Bengali to English: '{bengali_text_intermediate}'")
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
print(f"Normalized Bengali: '{normalized_bn_text}'")
|
| 109 |
|
| 110 |
input_ids = bn_to_en_tokenizer(
|
| 111 |
normalized_bn_text,
|
| 112 |
return_tensors="pt"
|
| 113 |
-
).input_ids.to(model_device)
|
| 114 |
|
| 115 |
generated_tokens = bn_to_en_model.generate(
|
| 116 |
input_ids,
|
|
@@ -152,11 +158,10 @@ iface = gr.Interface(
|
|
| 152 |
["আফনে ভালা আছনি?"]
|
| 153 |
],
|
| 154 |
allow_flagging="never",
|
| 155 |
-
|
|
|
|
| 156 |
)
|
| 157 |
|
| 158 |
# --- Launch the Gradio app ---
|
| 159 |
if __name__ == "__main__":
|
| 160 |
-
# When running locally, this launches the server.
|
| 161 |
-
# In Hugging Face Spaces, the `app.py` is typically run by their infrastructure.
|
| 162 |
iface.launch()
|
|
|
|
| 11 |
NORMALIZER_REPO = "https://github.com/csebuetnlp/normalizer.git"
|
| 12 |
|
| 13 |
# --- Helper function to install/import normalizer ---
|
|
|
|
|
|
|
| 14 |
normalizer_module = None
|
| 15 |
+
dummy_normalizer_flag = False # Flag to indicate if dummy is used
|
| 16 |
+
|
| 17 |
+
def dummy_normalize_func(text): # Define the dummy function clearly
|
| 18 |
+
raise RuntimeError("Normalizer library could not be loaded. Please check installation and logs.")
|
| 19 |
+
|
| 20 |
try:
|
| 21 |
from normalizer import normalize as normalize_fn_imported
|
| 22 |
normalizer_module = normalize_fn_imported
|
|
|
|
| 24 |
except ImportError:
|
| 25 |
print(f"Normalizer library not found. Attempting to install from {NORMALIZER_REPO}...")
|
| 26 |
try:
|
|
|
|
|
|
|
| 27 |
subprocess.check_call([sys.executable, "-m", "pip", "install", f"git+{NORMALIZER_REPO}#egg=normalizer"])
|
| 28 |
from normalizer import normalize as normalize_fn_imported_after_install
|
| 29 |
normalizer_module = normalize_fn_imported_after_install
|
|
|
|
| 31 |
except Exception as e:
|
| 32 |
print(f"Failed to install or import normalizer: {e}")
|
| 33 |
print("Please ensure 'git+https://github.com/csebuetnlp/normalizer.git#egg=normalizer' is in your requirements.txt for Hugging Face Spaces.")
|
| 34 |
+
normalizer_module = dummy_normalize_func # Assign the actual dummy function
|
| 35 |
+
dummy_normalizer_flag = True
|
| 36 |
+
|
|
|
|
| 37 |
|
| 38 |
# --- Model Loading (Globally, when the script starts) ---
|
| 39 |
sylheti_to_bn_pipe = None
|
|
|
|
| 63 |
|
| 64 |
except Exception as e:
|
| 65 |
print(f"FATAL: Error loading one or more models: {e}")
|
|
|
|
|
|
|
| 66 |
sylheti_to_bn_pipe = None
|
| 67 |
bn_to_en_model = None
|
| 68 |
bn_to_en_tokenizer = None
|
|
|
|
| 76 |
return "Error: Sylheti-to-Bengali model not loaded. Check logs.", ""
|
| 77 |
if not bn_to_en_model or not bn_to_en_tokenizer:
|
| 78 |
return "Error: Bengali-to-English model not loaded. Check logs.", ""
|
| 79 |
+
|
| 80 |
+
# Check if the normalizer is the dummy function
|
| 81 |
+
if dummy_normalizer_flag or normalizer_module is None:
|
| 82 |
return "Error: Bengali normalizer library not available. Check logs.", ""
|
| 83 |
|
| 84 |
|
|
|
|
| 99 |
except Exception as e:
|
| 100 |
print(f"Error during Sylheti to Bengali translation: {e}")
|
| 101 |
bengali_text_intermediate = f"Sylheti->Bengali Error: {str(e)}"
|
| 102 |
+
return bengali_text_intermediate, english_text_final
|
| 103 |
|
| 104 |
# Step 2: Bengali → English
|
| 105 |
try:
|
| 106 |
print(f"Normalizing and translating Bengali to English: '{bengali_text_intermediate}'")
|
| 107 |
+
# Ensure normalizer_module is callable before calling
|
| 108 |
+
if callable(normalizer_module):
|
| 109 |
+
normalized_bn_text = normalizer_module(bengali_text_intermediate)
|
| 110 |
+
else:
|
| 111 |
+
# This case should ideally be caught by the check above, but as a safeguard:
|
| 112 |
+
raise RuntimeError("Normalizer function is not callable.")
|
| 113 |
+
|
| 114 |
print(f"Normalized Bengali: '{normalized_bn_text}'")
|
| 115 |
|
| 116 |
input_ids = bn_to_en_tokenizer(
|
| 117 |
normalized_bn_text,
|
| 118 |
return_tensors="pt"
|
| 119 |
+
).input_ids.to(model_device)
|
| 120 |
|
| 121 |
generated_tokens = bn_to_en_model.generate(
|
| 122 |
input_ids,
|
|
|
|
| 158 |
["আফনে ভালা আছনি?"]
|
| 159 |
],
|
| 160 |
allow_flagging="never",
|
| 161 |
+
cache_examples=False, # Explicitly disable example caching
|
| 162 |
+
theme=gr.themes.Soft()
|
| 163 |
)
|
| 164 |
|
| 165 |
# --- Launch the Gradio app ---
|
| 166 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 167 |
iface.launch()
|