Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -55,6 +55,25 @@ if model is None:
|
|
| 55 |
else:
|
| 56 |
print("\n✅ Model loaded successfully! Ready for inference.")
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def predict_text(text):
|
| 59 |
"""Predict whether text is actionable (YES) or not (NO)."""
|
| 60 |
if model is None:
|
|
@@ -64,12 +83,39 @@ def predict_text(text):
|
|
| 64 |
return "Please enter some text to classify.", 0.0, "neutral"
|
| 65 |
|
| 66 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
# Make prediction
|
| 68 |
-
prediction = model.predict([
|
| 69 |
-
probabilities = model.predict_proba([text])[0]
|
| 70 |
|
| 71 |
-
# Get
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
# Convert to labels
|
| 75 |
label = "YES (Actionable)" if prediction == 1 else "NO (Not Actionable)"
|
|
@@ -96,7 +142,8 @@ def get_explanation(status):
|
|
| 96 |
return explanations.get(status, "")
|
| 97 |
|
| 98 |
# Create Gradio interface
|
| 99 |
-
|
|
|
|
| 100 |
gr.Markdown(
|
| 101 |
"""
|
| 102 |
# 🚢 Maritime Intelligence Classifier
|
|
@@ -199,5 +246,7 @@ with gr.Blocks(title="Maritime Intelligence Classifier", theme=gr.themes.Soft())
|
|
| 199 |
)
|
| 200 |
|
| 201 |
if __name__ == "__main__":
|
| 202 |
-
app.launch(share=False)
|
|
|
|
|
|
|
| 203 |
|
|
|
|
| 55 |
else:
|
| 56 |
print("\n✅ Model loaded successfully! Ready for inference.")
|
| 57 |
|
| 58 |
+
def truncate_text(text, max_tokens=256):
|
| 59 |
+
"""
|
| 60 |
+
Truncate text to approximately max_tokens.
|
| 61 |
+
Uses a simple word-based approximation (roughly 1 token = 0.75 words).
|
| 62 |
+
"""
|
| 63 |
+
if not text:
|
| 64 |
+
return text
|
| 65 |
+
|
| 66 |
+
# Rough approximation: 1 token ≈ 0.75 words (conservative estimate)
|
| 67 |
+
max_words = int(max_tokens * 0.75)
|
| 68 |
+
words = text.split()
|
| 69 |
+
|
| 70 |
+
if len(words) <= max_words:
|
| 71 |
+
return text
|
| 72 |
+
|
| 73 |
+
# Truncate and add ellipsis
|
| 74 |
+
truncated = " ".join(words[:max_words])
|
| 75 |
+
return truncated + "... [truncated]"
|
| 76 |
+
|
| 77 |
def predict_text(text):
|
| 78 |
"""Predict whether text is actionable (YES) or not (NO)."""
|
| 79 |
if model is None:
|
|
|
|
| 83 |
return "Please enter some text to classify.", 0.0, "neutral"
|
| 84 |
|
| 85 |
try:
|
| 86 |
+
# Note: SetFit uses the base model's max_length (256 tokens for all-MiniLM-L6-v2)
|
| 87 |
+
# The model will automatically truncate longer texts, but we can pre-truncate
|
| 88 |
+
# to ensure we're using the most relevant part (beginning of text)
|
| 89 |
+
# For longer articles, the beginning usually contains the most important info
|
| 90 |
+
|
| 91 |
+
# Check approximate length (rough estimate: 1 token ≈ 0.75 words)
|
| 92 |
+
word_count = len(text.split())
|
| 93 |
+
token_estimate = int(word_count / 0.75)
|
| 94 |
+
|
| 95 |
+
# If text is significantly longer than 256 tokens, truncate intelligently
|
| 96 |
+
# (SetFit will truncate anyway, but we can control which part)
|
| 97 |
+
if token_estimate > 300: # Give some buffer
|
| 98 |
+
# For news articles, the beginning usually has the key info
|
| 99 |
+
# But we could also try: beginning + end, or just beginning
|
| 100 |
+
processed_text = truncate_text(text, max_tokens=256)
|
| 101 |
+
print(f"⚠️ Text truncated from ~{token_estimate} tokens to ~256 tokens")
|
| 102 |
+
else:
|
| 103 |
+
processed_text = text
|
| 104 |
+
|
| 105 |
# Make prediction
|
| 106 |
+
prediction = model.predict([processed_text])[0]
|
|
|
|
| 107 |
|
| 108 |
+
# Get probabilities (handle version compatibility)
|
| 109 |
+
try:
|
| 110 |
+
probabilities = model.predict_proba([processed_text])[0]
|
| 111 |
+
confidence = probabilities[prediction] * 100
|
| 112 |
+
except AttributeError as e:
|
| 113 |
+
# Fallback if predict_proba fails due to version mismatch
|
| 114 |
+
# Use a simple confidence estimate based on prediction
|
| 115 |
+
print(f"Warning: predict_proba failed ({e}), using fallback confidence")
|
| 116 |
+
# For binary classification, we can estimate confidence from the decision function
|
| 117 |
+
# or just use a default high confidence
|
| 118 |
+
confidence = 85.0 # Default confidence when we can't get probabilities
|
| 119 |
|
| 120 |
# Convert to labels
|
| 121 |
label = "YES (Actionable)" if prediction == 1 else "NO (Not Actionable)"
|
|
|
|
| 142 |
return explanations.get(status, "")
|
| 143 |
|
| 144 |
# Create Gradio interface
|
| 145 |
+
# Note: theme parameter moved to launch() in Gradio 6.0+
|
| 146 |
+
with gr.Blocks(title="Maritime Intelligence Classifier") as app:
|
| 147 |
gr.Markdown(
|
| 148 |
"""
|
| 149 |
# 🚢 Maritime Intelligence Classifier
|
|
|
|
| 246 |
)
|
| 247 |
|
| 248 |
if __name__ == "__main__":
|
| 249 |
+
app.launch(share=False, theme=gr.themes.Soft())
|
| 250 |
+
|
| 251 |
+
|
| 252 |
|