maifeng
/

boilerplate_detection

Text Classification

boilerplate-detection

analyst-reports

Model card Files Files and versions

maifeng commited on Sep 5, 2025

Commit

ca44142

·

verified ·

1 Parent(s): cd125b5

Update README.md

Files changed (1) hide show

README.md +6 -5

README.md CHANGED Viewed

@@ -3,15 +3,12 @@ license: apache-2.0
 language: en
 tags:
 - text-classification
 - financial-text
 - boilerplate-detection
 - analyst-reports
 pipeline_tag: text-classification
-widget:
-- text: "The securities and related financial instruments described herein may not be eligible for sale in all jurisdictions or to certain categories of investors."
-- text: "Our revenue increased by 15% compared to last quarter due to strong demand in emerging markets."
-- text: "This report contains forward-looking statements that involve risks and uncertainties."
-- text: "We launched three innovative products this quarter that exceeded our sales projections by 40%."
 ---
 # Boilerplate Detection for Financial Text
@@ -45,6 +42,9 @@ config = BoilerplateConfig.from_pretrained('maifeng/boilerplate_detection')
 model = BoilerplateDetector.from_pretrained('maifeng/boilerplate_detection')
 tokenizer = AutoTokenizer.from_pretrained('maifeng/boilerplate_detection')
 model.eval()
 # Classify texts
@@ -61,6 +61,7 @@ threshold = 0.5
 results = []
 for text in texts:
     inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
     with torch.no_grad():
         outputs = model(**inputs)

 language: en
 tags:
 - text-classification
+- finance
+- accounting
 - financial-text
 - boilerplate-detection
 - analyst-reports
 pipeline_tag: text-classification
 ---
 # Boilerplate Detection for Financial Text
 model = BoilerplateDetector.from_pretrained('maifeng/boilerplate_detection')
 tokenizer = AutoTokenizer.from_pretrained('maifeng/boilerplate_detection')
+# Move model to GPU if available
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = model.to(device)
 model.eval()
 # Classify texts
 results = []
 for text in texts:
     inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
+    inputs = {k: v.to(device) for k, v in inputs.items()}  # Move inputs to device
     with torch.no_grad():
         outputs = model(**inputs)