Spaces:

gamaly
/

ArticleClassifier

Sleeping

App Files Files Community

gamaly commited on Dec 24, 2025

Commit

3c76e95

verified ·

1 Parent(s): 327be00

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -6

app.py CHANGED Viewed

@@ -55,6 +55,25 @@ if model is None:
 else:
     print("\n✅ Model loaded successfully! Ready for inference.")
 def predict_text(text):
     """Predict whether text is actionable (YES) or not (NO)."""
     if model is None:
@@ -64,12 +83,39 @@ def predict_text(text):
         return "Please enter some text to classify.", 0.0, "neutral"
     try:
         # Make prediction
-        prediction = model.predict([text])[0]
-        probabilities = model.predict_proba([text])[0]
-        # Get confidence
-        confidence = probabilities[prediction] * 100
         # Convert to labels
         label = "YES (Actionable)" if prediction == 1 else "NO (Not Actionable)"
@@ -96,7 +142,8 @@ def get_explanation(status):
     return explanations.get(status, "")
 # Create Gradio interface
-with gr.Blocks(title="Maritime Intelligence Classifier", theme=gr.themes.Soft()) as app:
     gr.Markdown(
         """
         # 🚢 Maritime Intelligence Classifier
@@ -199,5 +246,7 @@ with gr.Blocks(title="Maritime Intelligence Classifier", theme=gr.themes.Soft())
     )
 if __name__ == "__main__":
-    app.launch(share=False)

 else:
     print("\n✅ Model loaded successfully! Ready for inference.")
+def truncate_text(text, max_tokens=256):
+    """
+    Truncate text to approximately max_tokens.
+    Uses a simple word-based approximation (roughly 1 token = 0.75 words).
+    """
+    if not text:
+        return text
+    # Rough approximation: 1 token ≈ 0.75 words (conservative estimate)
+    max_words = int(max_tokens * 0.75)
+    words = text.split()
+    if len(words) <= max_words:
+        return text
+    # Truncate and add ellipsis
+    truncated = " ".join(words[:max_words])
+    return truncated + "... [truncated]"
 def predict_text(text):
     """Predict whether text is actionable (YES) or not (NO)."""
     if model is None:
         return "Please enter some text to classify.", 0.0, "neutral"
     try:
+        # Note: SetFit uses the base model's max_length (256 tokens for all-MiniLM-L6-v2)
+        # The model will automatically truncate longer texts, but we can pre-truncate
+        # to ensure we're using the most relevant part (beginning of text)
+        # For longer articles, the beginning usually contains the most important info
+        # Check approximate length (rough estimate: 1 token ≈ 0.75 words)
+        word_count = len(text.split())
+        token_estimate = int(word_count / 0.75)
+        # If text is significantly longer than 256 tokens, truncate intelligently
+        # (SetFit will truncate anyway, but we can control which part)
+        if token_estimate > 300:  # Give some buffer
+            # For news articles, the beginning usually has the key info
+            # But we could also try: beginning + end, or just beginning
+            processed_text = truncate_text(text, max_tokens=256)
+            print(f"⚠️  Text truncated from ~{token_estimate} tokens to ~256 tokens")
+        else:
+            processed_text = text
         # Make prediction
+        prediction = model.predict([processed_text])[0]
+        # Get probabilities (handle version compatibility)
+        try:
+            probabilities = model.predict_proba([processed_text])[0]
+            confidence = probabilities[prediction] * 100
+        except AttributeError as e:
+            # Fallback if predict_proba fails due to version mismatch
+            # Use a simple confidence estimate based on prediction
+            print(f"Warning: predict_proba failed ({e}), using fallback confidence")
+            # For binary classification, we can estimate confidence from the decision function
+            # or just use a default high confidence
+            confidence = 85.0  # Default confidence when we can't get probabilities
         # Convert to labels
         label = "YES (Actionable)" if prediction == 1 else "NO (Not Actionable)"
     return explanations.get(status, "")
 # Create Gradio interface
+# Note: theme parameter moved to launch() in Gradio 6.0+
+with gr.Blocks(title="Maritime Intelligence Classifier") as app:
     gr.Markdown(
         """
         # 🚢 Maritime Intelligence Classifier
     )
 if __name__ == "__main__":
+    app.launch(share=False, theme=gr.themes.Soft())