File size: 10,661 Bytes
a9d4f37
5bcf32b
 
a9d4f37
5bcf32b
 
 
a9d4f37
 
 
 
 
 
5bcf32b
a9d4f37
 
 
327be00
a9d4f37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5bcf32b
a9d4f37
 
 
 
 
 
5bcf32b
a9d4f37
 
 
5bcf32b
a9d4f37
5bcf32b
a9d4f37
 
 
327be00
a9d4f37
 
327be00
a9d4f37
 
 
3c76e95
a9d4f37
3c76e95
 
 
 
 
 
 
 
 
 
 
 
a9d4f37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5bcf32b
a9d4f37
 
 
5bcf32b
 
 
 
 
a9d4f37
3c76e95
 
 
a9d4f37
3c76e95
 
 
 
5bcf32b
a9d4f37
5bcf32b
a9d4f37
3c76e95
a9d4f37
3c76e95
a9d4f37
 
5bcf32b
a9d4f37
5bcf32b
 
 
 
a9d4f37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5bcf32b
 
 
 
a9d4f37
 
5bcf32b
 
 
 
 
a9d4f37
 
 
3c76e95
5bcf32b
 
 
 
a9d4f37
 
 
5bcf32b
 
 
 
 
 
 
 
 
 
 
 
a9d4f37
5bcf32b
 
a9d4f37
 
5bcf32b
 
 
 
 
 
 
 
 
 
 
 
a9d4f37
 
 
 
 
 
 
5bcf32b
 
 
 
 
 
 
a9d4f37
5bcf32b
 
a9d4f37
5bcf32b
 
 
 
 
 
 
 
a9d4f37
5bcf32b
 
a9d4f37
 
 
5bcf32b
 
a9d4f37
5bcf32b
 
 
 
 
 
 
 
 
a9d4f37
 
 
 
 
5bcf32b
 
a9d4f37
5bcf32b
a9d4f37
5bcf32b
 
 
a9d4f37
5bcf32b
a9d4f37
5bcf32b
 
 
 
 
 
 
a9d4f37
 
 
5bcf32b
a9d4f37
5bcf32b
 
 
 
a9d4f37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
"""Gradio app for Maritime Intelligence Classifier + Entity Extraction."""
import gradio as gr
from setfit import SetFitModel
from transformers import pipeline
from pathlib import Path
import os

# ============================================================
# MODEL PATHS
# ============================================================
# Classification model (SetFit)
CLASSIFIER_PATH = os.getenv("CLASSIFIER_PATH", "gamaly/maritime-intelligence-classifier")
LOCAL_CLASSIFIER_PATH = "./maritime_classifier"

# NER model (BERT) - UPDATE THIS WITH YOUR HF REPO
NER_PATH = os.getenv("NER_PATH", "gamaly/bert-vessel-ner")  # ← Change to your repo!
LOCAL_NER_PATH = "./models/bert-vessel-ner"

# ============================================================
# LOAD MODELS
# ============================================================
print("="*60)
print("Loading models...")
print("="*60)

# Load Classification Model
classifier = None
try:
    if "/" in CLASSIFIER_PATH and not Path(CLASSIFIER_PATH).exists():
        print(f"Loading classifier from HuggingFace: {CLASSIFIER_PATH}")
        classifier = SetFitModel.from_pretrained(CLASSIFIER_PATH)
    elif Path(LOCAL_CLASSIFIER_PATH).exists():
        print(f"Loading classifier from local: {LOCAL_CLASSIFIER_PATH}")
        classifier = SetFitModel.from_pretrained(LOCAL_CLASSIFIER_PATH)
    else:
        print(f"Loading classifier from HuggingFace: {CLASSIFIER_PATH}")
        classifier = SetFitModel.from_pretrained(CLASSIFIER_PATH)
    print(f"βœ“ Classifier loaded")
except Exception as e:
    print(f"❌ Classifier failed to load: {e}")

# Load NER Model
ner_model = None
try:
    if "/" in NER_PATH and not Path(NER_PATH).exists():
        print(f"Loading NER from HuggingFace: {NER_PATH}")
        ner_model = pipeline("ner", model=NER_PATH, aggregation_strategy="simple")
    elif Path(LOCAL_NER_PATH).exists():
        print(f"Loading NER from local: {LOCAL_NER_PATH}")
        ner_model = pipeline("ner", model=LOCAL_NER_PATH, aggregation_strategy="simple")
    else:
        print(f"Loading NER from HuggingFace: {NER_PATH}")
        ner_model = pipeline("ner", model=NER_PATH, aggregation_strategy="simple")
    print(f"βœ“ NER model loaded")
except Exception as e:
    print(f"❌ NER model failed to load: {e}")

print("="*60)
if classifier and ner_model:
    print("βœ… All models loaded successfully!")
else:
    print("⚠️  Some models failed to load. Check logs above.")
print("="*60)

# ============================================================
# HELPER FUNCTIONS
# ============================================================
def truncate_text(text, max_tokens=256):
    """Truncate text to approximately max_tokens."""
    if not text:
        return text
    
    max_words = int(max_tokens * 0.75)
    words = text.split()
    
    if len(words) <= max_words:
        return text
    
    truncated = " ".join(words[:max_words])
    return truncated + "... [truncated]"

def extract_entities(text):
    """Extract VESSEL and ORG entities from text."""
    if ner_model is None:
        return [], []
    
    if not text or not text.strip():
        return [], []
    
    try:
        entities = ner_model(text)
        
        vessels = []
        orgs = []
        
        for e in entities:
            entity_text = e['word'].strip()
            score = e['score']
            entity_type = e['entity_group']
            
            # Skip low confidence
            if score < 0.5:
                continue
            
            # Clean up tokenization artifacts
            entity_text = entity_text.replace(" ##", "").replace("##", "")
            
            if entity_type == 'VESSEL':
                vessels.append({"text": entity_text, "score": score})
            elif entity_type == 'ORG':
                orgs.append({"text": entity_text, "score": score})
        
        # Deduplicate
        vessels = list({v['text']: v for v in vessels}.values())
        orgs = list({o['text']: o for o in orgs}.values())
        
        return vessels, orgs
    except Exception as e:
        print(f"NER error: {e}")
        return [], []

def predict_text(text):
    """Predict whether text is actionable and extract entities."""
    if classifier is None:
        return "Error: Classifier not loaded.", 0.0, "error"
    
    if not text or not text.strip():
        return "Please enter some text to classify.", 0.0, "neutral"
    
    try:
        # Truncate if needed
        word_count = len(text.split())
        token_estimate = int(word_count / 0.75)
        
        if token_estimate > 300:
            processed_text = truncate_text(text, max_tokens=256)
        else:
            processed_text = text
        
        # Make prediction
        prediction = classifier.predict([processed_text])[0]
        
        # Get probabilities
        try:
            probabilities = classifier.predict_proba([processed_text])[0]
            confidence = probabilities[prediction] * 100
        except AttributeError:
            confidence = 85.0
        
        label = "YES (Actionable)" if prediction == 1 else "NO (Not Actionable)"
        status = "actionable" if prediction == 1 else "not_actionable"
        
        return label, confidence, status
    except Exception as e:
        print(f"Classification error: {e}")
        return f"Error: {str(e)}", 0.0, "error"

def format_entities(vessels, orgs):
    """Format extracted entities as markdown."""
    if not vessels and not orgs:
        return "No entities detected."
    
    output = ""
    
    if vessels:
        output += "### 🚒 Vessels\n"
        for v in vessels:
            output += f"- **{v['text']}** ({v['score']:.0%})\n"
        output += "\n"
    
    if orgs:
        output += "### 🏒 Organizations\n"
        for o in orgs:
            output += f"- **{o['text']}** ({o['score']:.0%})\n"
    
    return output

def get_explanation(status):
    """Get explanation based on prediction status."""
    explanations = {
        "actionable": "βœ“ This text contains actionable vessel-specific evidence.",
        "not_actionable": "βœ— This text does not contain actionable vessel-specific evidence.",
        "error": "⚠️ An error occurred. Please check the model is properly loaded.",
        "neutral": ""
    }
    return explanations.get(status, "")

# ============================================================
# GRADIO APP
# ============================================================
with gr.Blocks(title="Maritime Intelligence Classifier") as app:
    gr.Markdown(
        """
        # 🚒 Maritime Intelligence Classifier
        
        **Two-stage analysis:**
        1. **Classification** - Is this article actionable?
        2. **Entity Extraction** - What vessels and organizations are mentioned?
        """
    )
    
    with gr.Row():
        with gr.Column(scale=2):
            text_input = gr.Textbox(
                label="Article Text",
                placeholder="Paste or type the maritime news article text here...",
                lines=10,
                max_lines=20
            )
            
            submit_btn = gr.Button("Analyze", variant="primary", size="lg")
        
        with gr.Column(scale=1):
            # Classification results
            gr.Markdown("### πŸ“Š Classification")
            prediction_output = gr.Label(
                label="Prediction",
                value={"YES (Actionable)": 0.0, "NO (Not Actionable)": 0.0}
            )
            
            confidence_output = gr.Number(
                label="Confidence",
                value=0.0,
                precision=1
            )
            
            explanation_output = gr.Markdown()
            
            # Entity extraction results
            gr.Markdown("---")
            entities_output = gr.Markdown(
                label="Extracted Entities",
                value="### πŸ” Extracted Entities\nNo entities detected yet."
            )
    
    # Example texts
    gr.Markdown("### πŸ“ Example Texts")
    with gr.Row():
        example_yes = gr.Examples(
            examples=[
                ["The fishing vessel Marine 707 was involved in the disappearance of fisheries observer Samuel Abayateye in Ghanaian waters. The observer's decapitated body was found weeks later."],
                ["Authorities detained the Meng Xin 15 after discovering evidence of illegal saiko transshipment. Pacific Seafood Inc. was identified as the vessel operator."],
            ],
            inputs=text_input,
            label="Actionable Examples"
        )
        
        example_no = gr.Examples(
            examples=[
                ["A new maritime museum opened in the port city, showcasing historical ships and ocean exploration artifacts."],
                ["Marine scientists are studying the effects of ocean acidification on coral reefs in tropical waters."],
            ],
            inputs=text_input,
            label="Non-Actionable Examples"
        )
    
    # Main analysis function
    def analyze_text(text):
        # Classification
        label, confidence, status = predict_text(text)
        
        # Create label dict
        if status == "actionable":
            label_dict = {"YES (Actionable)": confidence / 100, "NO (Not Actionable)": (100 - confidence) / 100}
        elif status == "not_actionable":
            label_dict = {"YES (Actionable)": (100 - confidence) / 100, "NO (Not Actionable)": confidence / 100}
        else:
            label_dict = {"YES (Actionable)": 0.0, "NO (Not Actionable)": 0.0}
        
        explanation = get_explanation(status)
        
        # Entity extraction
        vessels, orgs = extract_entities(text)
        entities_md = "### πŸ” Extracted Entities\n" + format_entities(vessels, orgs)
        
        return label_dict, confidence, explanation, entities_md
    
    submit_btn.click(
        fn=analyze_text,
        inputs=text_input,
        outputs=[prediction_output, confidence_output, explanation_output, entities_output]
    )
    
    text_input.submit(
        fn=analyze_text,
        inputs=text_input,
        outputs=[prediction_output, confidence_output, explanation_output, entities_output]
    )
    
    gr.Markdown(
        """
        ---
        ### ℹ️ About
        
        **Classification**: SetFit model identifies actionable maritime intelligence.
        
        **Entity Extraction**: BERT-NER model extracts vessel names and organizations.
        
        Built for The Outlaw Ocean Project.
        """
    )

if __name__ == "__main__":
    app.launch(share=False, theme=gr.themes.Soft())