silashundhausen commited on Nov 28, 2025

Commit

51454d1

verified ·

1 Parent(s): 95eaf1f

Upload Phase 2 Hierarchical Models (93.5% F1)

Browse files

Files changed (32) hide show

README.md +47 -0
inference_wrapper.py +63 -0
models/embeddings_cache.npy +3 -0
models/router_le.pkl +3 -0
models/router_xgb.pkl +3 -0
models/specialist_AGM_Info_le.pkl +3 -0
models/specialist_AGM_Info_xgb.pkl +3 -0
models/specialist_Annual_General_Meeting_le.pkl +3 -0
models/specialist_Annual_General_Meeting_xgb.pkl +3 -0
models/specialist_Debt_Info_le.pkl +3 -0
models/specialist_Debt_Info_xgb.pkl +3 -0
models/specialist_ESG_Info_le.pkl +3 -0
models/specialist_ESG_Info_xgb.pkl +3 -0
models/specialist_Equity_Info_le.pkl +3 -0
models/specialist_Equity_Info_xgb.pkl +3 -0
models/specialist_Equity_Information_le.pkl +3 -0
models/specialist_Equity_Information_xgb.pkl +3 -0
models/specialist_Financial_Reporting_le.pkl +3 -0
models/specialist_Financial_Reporting_xgb.pkl +3 -0
models/specialist_Investment_Vehicle_le.pkl +3 -0
models/specialist_Investment_Vehicle_xgb.pkl +3 -0
models/specialist_Investor_Comm_le.pkl +3 -0
models/specialist_Investor_Comm_xgb.pkl +3 -0
models/specialist_Investor_Communication_le.pkl +3 -0
models/specialist_Investor_Communication_xgb.pkl +3 -0
models/specialist_Listing_and_Regulatory_le.pkl +3 -0
models/specialist_Listing_and_Regulatory_xgb.pkl +3 -0
models/specialist_Management_le.pkl +3 -0
models/specialist_Management_xgb.pkl +3 -0
models/specialist_MandA_and_Legal_le.pkl +3 -0
models/specialist_MandA_and_Legal_xgb.pkl +3 -0
requirements.txt +4 -0

README.md ADDED Viewed

	@@ -0,0 +1,47 @@

+---
+tags:
+- financial-filings
+- classification
+- xgboost
+- jina-embeddings-v3
+library_name: xgboost
+metrics:
+- f1: 0.935
+- accuracy: 0.95
+---
+# Financial Reports Hierarchical Classifier
+This is a production-grade Hierarchical Cascade Classifier designed to categorize European financial filings into **29 distinct classes**.
+## Architecture
+- **Level 1 (Router):** A Jina-V3 + XGBoost model routing to 8 main categories.
+- **Level 2 (Specialists):** Specialized XGBoost models for fine-grained classification.
+## Performance
+- **Global Weighted F1-Score:** 93.5%
+- **Top-2 Router Accuracy:** 97.3%
+## Usage
+```python
+from huggingface_hub import snapshot_download
+import sys
+import os
+# 1. Download Models
+model_path = snapshot_download(repo_id="FinancialReports/hierarchical-filing-classifier")
+# 2. Add path and import wrapper
+sys.path.append(model_path)
+from inference_wrapper import FinancialFilingClassifier
+# 3. Predict
+classifier = FinancialFilingClassifier(model_path)
+text = "The Board declares a dividend of 5 cents per share..."
+result = classifier.predict(text)
+print(result)
+# Expect: {'category': 'Equity Info', 'label': 'Notice of Dividend Amount', 'score': 0.98}
+```

inference_wrapper.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import numpy as np
+import joblib
+import torch
+from transformers import AutoModel
+import os
+class FinancialFilingClassifier:
+    def __init__(self, model_dir):
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        print(f"Loading Jina Encoder on {self.device}...")
+        self.encoder = AutoModel.from_pretrained(
+            "jinaai/jina-embeddings-v3",
+            trust_remote_code=True,
+            torch_dtype=torch.float16 if self.device == 'cuda' else torch.float32
+        ).to(self.device)
+        print("Loading XGBoost Cascade...")
+        self.router = joblib.load(os.path.join(model_dir, "router_xgb.pkl"))
+        self.router_le = joblib.load(os.path.join(model_dir, "router_le.pkl"))
+        self.specialists = {}
+        self.model_dir = model_dir
+    def _get_vector(self, text):
+        log_len = np.log1p(len(str(text)))
+        with torch.no_grad():
+            vec = self.encoder.encode([text], task="classification", max_length=8192)
+        return np.hstack([vec, [[log_len]]])
+    def _load_specialist(self, category):
+        safe_name = category.replace(" ", "_").replace("&", "and").replace("/", "_")
+        if safe_name not in self.specialists:
+            try:
+                clf = joblib.load(os.path.join(self.model_dir, f"specialist_{safe_name}_xgb.pkl"))
+                le = joblib.load(os.path.join(self.model_dir, f"specialist_{safe_name}_le.pkl"))
+                self.specialists[safe_name] = (clf, le)
+            except FileNotFoundError:
+                return None
+        return self.specialists[safe_name]
+    def predict(self, text):
+        vector = self._get_vector(text)
+        router_probs = self.router.predict_proba(vector)[0]
+        top_indices = np.argsort(router_probs)[::-1][:2]
+        candidates = []
+        for idx in top_indices:
+            category = self.router_le.classes_[idx]
+            router_conf = router_probs[idx]
+            specialist = self._load_specialist(category)
+            if specialist:
+                clf, le = specialist
+                spec_probs = clf.predict_proba(vector)[0]
+                best_idx = np.argmax(spec_probs)
+                label = le.classes_[best_idx]
+                spec_conf = spec_probs[best_idx]
+                combined_score = np.sqrt(router_conf * spec_conf)
+                candidates.append({"category": category, "label": label, "score": float(combined_score)})
+            else:
+                candidates.append({"category": category, "label": category, "score": float(router_conf)})
+        return max(candidates, key=lambda x: x['score'])

models/embeddings_cache.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4688ffc541223e3e9ec569c95eb068b6868a14cea6a137c629e178871365fd7a
+size 113340544

models/router_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15a1d996fd2c0da4a615c9e597c908896674e27dad31e2ba7ab5102140e4320d
+size 632

models/router_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d5620b5760644ba8ae76c97fc6aca687c5ce50b90b2100c1637b07e94fad427
+size 73162563

models/specialist_AGM_Info_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:477abc9b577e160165346f6cbdaff0f8efc6c472a00a6cfae850500d99eed90d
+size 598

models/specialist_AGM_Info_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94130f1f5053739d54116d3c999705fc87cd4eeaa68872fa9815914a49d7b073
+size 496534

models/specialist_Annual_General_Meeting_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:477abc9b577e160165346f6cbdaff0f8efc6c472a00a6cfae850500d99eed90d
+size 598

models/specialist_Annual_General_Meeting_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f74cc376e804cd27a1b1f2a79dc26b51259a8da911a2b198eca5e87ab1e28454
+size 496534

models/specialist_Debt_Info_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6868fe3e56a10d06a704c2357ce1812f20a0710d28333081a2630ddc39b41eb3
+size 530

models/specialist_Debt_Info_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13ce8e1806221950b6d16f3515ecf36b7282c22c61f2904e685903b9b2482b7c
+size 138075

models/specialist_ESG_Info_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ef2b24df4fd6c9ec6f401db704153fa435a029f0a7a4414186aadc82e264ac9
+size 535

models/specialist_ESG_Info_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50edb2b9d105feaeb23706701b2e3ad893b1cb2dc08c6ec9fd91ec3a13a5da19
+size 129758

models/specialist_Equity_Info_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a5feef1ff16ac95bd52b4bfa6e34d709d5c2bf17066341fb1c9a0d1450098e7
+size 592

models/specialist_Equity_Info_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:530f21f5820d8de857f06b32622d55a6863d7878ab197b4f393f448703e53e25
+size 677055

models/specialist_Equity_Information_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:548ab85c86c92c57b6701ad0db7a0f801299ea1ac9e588e25a3cb92e922e7729
+size 607

models/specialist_Equity_Information_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca5f93c20020a37341ef493fc368def5c02221aa889c752cf3c117536cb02d27
+size 835053

models/specialist_Financial_Reporting_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d81c24a1796d222f1cfcf561af8efcde464fb2f90b5049bf7578c68bfd5f9573
+size 566

models/specialist_Financial_Reporting_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2eb465b9104ff331bbf6cb8906df68747995ec155109b548a6e66f647f8ff0c1
+size 666991

models/specialist_Investment_Vehicle_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d0f6618f6e2e99ecaa2e4a133479504f0bb478fe7a93466327c56f6fc349dca
+size 522

models/specialist_Investment_Vehicle_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6141f6e1eda21e0e1245c32e10f962a91fc88196b380f94b527eb28c5c351158
+size 158342

models/specialist_Investor_Comm_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5fae7ca10a1807a1ccb21bfa698afbee9e4535899fe83eede1b7a66f68d69a6
+size 549

models/specialist_Investor_Comm_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ddf0256e5c5e2b0c334ec9562cdecceea17400b50b897f92ebda3887a1818cc
+size 416531

models/specialist_Investor_Communication_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5fae7ca10a1807a1ccb21bfa698afbee9e4535899fe83eede1b7a66f68d69a6
+size 549

models/specialist_Investor_Communication_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:285265c6f1a893ac68ccfbc5e8ab238aeb03fe88e54edd048e2d4fa64cede4fe
+size 416531

models/specialist_Listing_and_Regulatory_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31366cfe84445656c65d04dac807007a8a54ddb1fed494b7c5cc451c624d8939
+size 519

models/specialist_Listing_and_Regulatory_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8162901718ab78a33438bcc335348395c80d749d39746a2d5bd1dd2783a6353f
+size 141206

models/specialist_Management_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed859c1e51c69a0cf1c1fd4dc9359bd69769b8fcff1961df3a9b6ede766924bb
+size 573

models/specialist_Management_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57c28884a3c727a6b269b5edd496a6ce98bb17dedad9d6ce36315e966e14ed33
+size 672975

models/specialist_MandA_and_Legal_le.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da4d59bc1c406c82dbc76aeec01d2b811cea97b79c1066efbb9193d5ee810a52
+size 515

models/specialist_MandA_and_Legal_xgb.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f05a37c7cbffe8e5c998051959455654be81357afc3195fa65a8612f65b1e97
+size 160790

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+transformers
+xgboost
+scikit-learn