"""Gradio app for Maritime Intelligence Classifier + Entity Extraction.""" import gradio as gr from setfit import SetFitModel from transformers import pipeline from pathlib import Path import os # ============================================================ # MODEL PATHS # ============================================================ # Classification model (SetFit) CLASSIFIER_PATH = os.getenv("CLASSIFIER_PATH", "gamaly/maritime-intelligence-classifier") LOCAL_CLASSIFIER_PATH = "./maritime_classifier" # NER model (BERT) - UPDATE THIS WITH YOUR HF REPO NER_PATH = os.getenv("NER_PATH", "gamaly/bert-vessel-ner") # ← Change to your repo! LOCAL_NER_PATH = "./models/bert-vessel-ner" # ============================================================ # LOAD MODELS # ============================================================ print("="*60) print("Loading models...") print("="*60) # Load Classification Model classifier = None try: if "/" in CLASSIFIER_PATH and not Path(CLASSIFIER_PATH).exists(): print(f"Loading classifier from HuggingFace: {CLASSIFIER_PATH}") classifier = SetFitModel.from_pretrained(CLASSIFIER_PATH) elif Path(LOCAL_CLASSIFIER_PATH).exists(): print(f"Loading classifier from local: {LOCAL_CLASSIFIER_PATH}") classifier = SetFitModel.from_pretrained(LOCAL_CLASSIFIER_PATH) else: print(f"Loading classifier from HuggingFace: {CLASSIFIER_PATH}") classifier = SetFitModel.from_pretrained(CLASSIFIER_PATH) print(f"✓ Classifier loaded") except Exception as e: print(f"❌ Classifier failed to load: {e}") # Load NER Model ner_model = None try: if "/" in NER_PATH and not Path(NER_PATH).exists(): print(f"Loading NER from HuggingFace: {NER_PATH}") ner_model = pipeline("ner", model=NER_PATH, aggregation_strategy="simple") elif Path(LOCAL_NER_PATH).exists(): print(f"Loading NER from local: {LOCAL_NER_PATH}") ner_model = pipeline("ner", model=LOCAL_NER_PATH, aggregation_strategy="simple") else: print(f"Loading NER from HuggingFace: {NER_PATH}") ner_model = pipeline("ner", model=NER_PATH, aggregation_strategy="simple") print(f"✓ NER model loaded") except Exception as e: print(f"❌ NER model failed to load: {e}") print("="*60) if classifier and ner_model: print("✅ All models loaded successfully!") else: print("⚠️ Some models failed to load. Check logs above.") print("="*60) # ============================================================ # HELPER FUNCTIONS # ============================================================ def truncate_text(text, max_tokens=256): """Truncate text to approximately max_tokens.""" if not text: return text max_words = int(max_tokens * 0.75) words = text.split() if len(words) <= max_words: return text truncated = " ".join(words[:max_words]) return truncated + "... [truncated]" def extract_entities(text): """Extract VESSEL and ORG entities from text.""" if ner_model is None: return [], [] if not text or not text.strip(): return [], [] try: entities = ner_model(text) vessels = [] orgs = [] for e in entities: entity_text = e['word'].strip() score = e['score'] entity_type = e['entity_group'] # Skip low confidence if score < 0.5: continue # Clean up tokenization artifacts entity_text = entity_text.replace(" ##", "").replace("##", "") if entity_type == 'VESSEL': vessels.append({"text": entity_text, "score": score}) elif entity_type == 'ORG': orgs.append({"text": entity_text, "score": score}) # Deduplicate vessels = list({v['text']: v for v in vessels}.values()) orgs = list({o['text']: o for o in orgs}.values()) return vessels, orgs except Exception as e: print(f"NER error: {e}") return [], [] def predict_text(text): """Predict whether text is actionable and extract entities.""" if classifier is None: return "Error: Classifier not loaded.", 0.0, "error" if not text or not text.strip(): return "Please enter some text to classify.", 0.0, "neutral" try: # Truncate if needed word_count = len(text.split()) token_estimate = int(word_count / 0.75) if token_estimate > 300: processed_text = truncate_text(text, max_tokens=256) else: processed_text = text # Make prediction prediction = classifier.predict([processed_text])[0] # Get probabilities try: probabilities = classifier.predict_proba([processed_text])[0] confidence = probabilities[prediction] * 100 except AttributeError: confidence = 85.0 label = "YES (Actionable)" if prediction == 1 else "NO (Not Actionable)" status = "actionable" if prediction == 1 else "not_actionable" return label, confidence, status except Exception as e: print(f"Classification error: {e}") return f"Error: {str(e)}", 0.0, "error" def format_entities(vessels, orgs): """Format extracted entities as markdown.""" if not vessels and not orgs: return "No entities detected." output = "" if vessels: output += "### 🚢 Vessels\n" for v in vessels: output += f"- **{v['text']}** ({v['score']:.0%})\n" output += "\n" if orgs: output += "### 🏢 Organizations\n" for o in orgs: output += f"- **{o['text']}** ({o['score']:.0%})\n" return output def get_explanation(status): """Get explanation based on prediction status.""" explanations = { "actionable": "✓ This text contains actionable vessel-specific evidence.", "not_actionable": "✗ This text does not contain actionable vessel-specific evidence.", "error": "⚠️ An error occurred. Please check the model is properly loaded.", "neutral": "" } return explanations.get(status, "") # ============================================================ # GRADIO APP # ============================================================ with gr.Blocks(title="Maritime Intelligence Classifier") as app: gr.Markdown( """ # 🚢 Maritime Intelligence Classifier **Two-stage analysis:** 1. **Classification** - Is this article actionable? 2. **Entity Extraction** - What vessels and organizations are mentioned? """ ) with gr.Row(): with gr.Column(scale=2): text_input = gr.Textbox( label="Article Text", placeholder="Paste or type the maritime news article text here...", lines=10, max_lines=20 ) submit_btn = gr.Button("Analyze", variant="primary", size="lg") with gr.Column(scale=1): # Classification results gr.Markdown("### 📊 Classification") prediction_output = gr.Label( label="Prediction", value={"YES (Actionable)": 0.0, "NO (Not Actionable)": 0.0} ) confidence_output = gr.Number( label="Confidence", value=0.0, precision=1 ) explanation_output = gr.Markdown() # Entity extraction results gr.Markdown("---") entities_output = gr.Markdown( label="Extracted Entities", value="### 🔍 Extracted Entities\nNo entities detected yet." ) # Example texts gr.Markdown("### 📝 Example Texts") with gr.Row(): example_yes = gr.Examples( examples=[ ["The fishing vessel Marine 707 was involved in the disappearance of fisheries observer Samuel Abayateye in Ghanaian waters. The observer's decapitated body was found weeks later."], ["Authorities detained the Meng Xin 15 after discovering evidence of illegal saiko transshipment. Pacific Seafood Inc. was identified as the vessel operator."], ], inputs=text_input, label="Actionable Examples" ) example_no = gr.Examples( examples=[ ["A new maritime museum opened in the port city, showcasing historical ships and ocean exploration artifacts."], ["Marine scientists are studying the effects of ocean acidification on coral reefs in tropical waters."], ], inputs=text_input, label="Non-Actionable Examples" ) # Main analysis function def analyze_text(text): # Classification label, confidence, status = predict_text(text) # Create label dict if status == "actionable": label_dict = {"YES (Actionable)": confidence / 100, "NO (Not Actionable)": (100 - confidence) / 100} elif status == "not_actionable": label_dict = {"YES (Actionable)": (100 - confidence) / 100, "NO (Not Actionable)": confidence / 100} else: label_dict = {"YES (Actionable)": 0.0, "NO (Not Actionable)": 0.0} explanation = get_explanation(status) # Entity extraction vessels, orgs = extract_entities(text) entities_md = "### 🔍 Extracted Entities\n" + format_entities(vessels, orgs) return label_dict, confidence, explanation, entities_md submit_btn.click( fn=analyze_text, inputs=text_input, outputs=[prediction_output, confidence_output, explanation_output, entities_output] ) text_input.submit( fn=analyze_text, inputs=text_input, outputs=[prediction_output, confidence_output, explanation_output, entities_output] ) gr.Markdown( """ --- ### ℹ️ About **Classification**: SetFit model identifies actionable maritime intelligence. **Entity Extraction**: BERT-NER model extracts vessel names and organizations. Built for The Outlaw Ocean Project. """ ) if __name__ == "__main__": app.launch(share=False, theme=gr.themes.Soft())