gamaly commited on
Commit
3c76e95
·
verified ·
1 Parent(s): 327be00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -6
app.py CHANGED
@@ -55,6 +55,25 @@ if model is None:
55
  else:
56
  print("\n✅ Model loaded successfully! Ready for inference.")
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def predict_text(text):
59
  """Predict whether text is actionable (YES) or not (NO)."""
60
  if model is None:
@@ -64,12 +83,39 @@ def predict_text(text):
64
  return "Please enter some text to classify.", 0.0, "neutral"
65
 
66
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  # Make prediction
68
- prediction = model.predict([text])[0]
69
- probabilities = model.predict_proba([text])[0]
70
 
71
- # Get confidence
72
- confidence = probabilities[prediction] * 100
 
 
 
 
 
 
 
 
 
73
 
74
  # Convert to labels
75
  label = "YES (Actionable)" if prediction == 1 else "NO (Not Actionable)"
@@ -96,7 +142,8 @@ def get_explanation(status):
96
  return explanations.get(status, "")
97
 
98
  # Create Gradio interface
99
- with gr.Blocks(title="Maritime Intelligence Classifier", theme=gr.themes.Soft()) as app:
 
100
  gr.Markdown(
101
  """
102
  # 🚢 Maritime Intelligence Classifier
@@ -199,5 +246,7 @@ with gr.Blocks(title="Maritime Intelligence Classifier", theme=gr.themes.Soft())
199
  )
200
 
201
  if __name__ == "__main__":
202
- app.launch(share=False)
 
 
203
 
 
55
  else:
56
  print("\n✅ Model loaded successfully! Ready for inference.")
57
 
58
+ def truncate_text(text, max_tokens=256):
59
+ """
60
+ Truncate text to approximately max_tokens.
61
+ Uses a simple word-based approximation (roughly 1 token = 0.75 words).
62
+ """
63
+ if not text:
64
+ return text
65
+
66
+ # Rough approximation: 1 token ≈ 0.75 words (conservative estimate)
67
+ max_words = int(max_tokens * 0.75)
68
+ words = text.split()
69
+
70
+ if len(words) <= max_words:
71
+ return text
72
+
73
+ # Truncate and add ellipsis
74
+ truncated = " ".join(words[:max_words])
75
+ return truncated + "... [truncated]"
76
+
77
  def predict_text(text):
78
  """Predict whether text is actionable (YES) or not (NO)."""
79
  if model is None:
 
83
  return "Please enter some text to classify.", 0.0, "neutral"
84
 
85
  try:
86
+ # Note: SetFit uses the base model's max_length (256 tokens for all-MiniLM-L6-v2)
87
+ # The model will automatically truncate longer texts, but we can pre-truncate
88
+ # to ensure we're using the most relevant part (beginning of text)
89
+ # For longer articles, the beginning usually contains the most important info
90
+
91
+ # Check approximate length (rough estimate: 1 token ≈ 0.75 words)
92
+ word_count = len(text.split())
93
+ token_estimate = int(word_count / 0.75)
94
+
95
+ # If text is significantly longer than 256 tokens, truncate intelligently
96
+ # (SetFit will truncate anyway, but we can control which part)
97
+ if token_estimate > 300: # Give some buffer
98
+ # For news articles, the beginning usually has the key info
99
+ # But we could also try: beginning + end, or just beginning
100
+ processed_text = truncate_text(text, max_tokens=256)
101
+ print(f"⚠️ Text truncated from ~{token_estimate} tokens to ~256 tokens")
102
+ else:
103
+ processed_text = text
104
+
105
  # Make prediction
106
+ prediction = model.predict([processed_text])[0]
 
107
 
108
+ # Get probabilities (handle version compatibility)
109
+ try:
110
+ probabilities = model.predict_proba([processed_text])[0]
111
+ confidence = probabilities[prediction] * 100
112
+ except AttributeError as e:
113
+ # Fallback if predict_proba fails due to version mismatch
114
+ # Use a simple confidence estimate based on prediction
115
+ print(f"Warning: predict_proba failed ({e}), using fallback confidence")
116
+ # For binary classification, we can estimate confidence from the decision function
117
+ # or just use a default high confidence
118
+ confidence = 85.0 # Default confidence when we can't get probabilities
119
 
120
  # Convert to labels
121
  label = "YES (Actionable)" if prediction == 1 else "NO (Not Actionable)"
 
142
  return explanations.get(status, "")
143
 
144
  # Create Gradio interface
145
+ # Note: theme parameter moved to launch() in Gradio 6.0+
146
+ with gr.Blocks(title="Maritime Intelligence Classifier") as app:
147
  gr.Markdown(
148
  """
149
  # 🚢 Maritime Intelligence Classifier
 
246
  )
247
 
248
  if __name__ == "__main__":
249
+ app.launch(share=False, theme=gr.themes.Soft())
250
+
251
+
252