Update README.md
Browse files
README.md
CHANGED
|
@@ -3,15 +3,12 @@ license: apache-2.0
|
|
| 3 |
language: en
|
| 4 |
tags:
|
| 5 |
- text-classification
|
|
|
|
|
|
|
| 6 |
- financial-text
|
| 7 |
- boilerplate-detection
|
| 8 |
- analyst-reports
|
| 9 |
pipeline_tag: text-classification
|
| 10 |
-
widget:
|
| 11 |
-
- text: "The securities and related financial instruments described herein may not be eligible for sale in all jurisdictions or to certain categories of investors."
|
| 12 |
-
- text: "Our revenue increased by 15% compared to last quarter due to strong demand in emerging markets."
|
| 13 |
-
- text: "This report contains forward-looking statements that involve risks and uncertainties."
|
| 14 |
-
- text: "We launched three innovative products this quarter that exceeded our sales projections by 40%."
|
| 15 |
---
|
| 16 |
|
| 17 |
# Boilerplate Detection for Financial Text
|
|
@@ -45,6 +42,9 @@ config = BoilerplateConfig.from_pretrained('maifeng/boilerplate_detection')
|
|
| 45 |
model = BoilerplateDetector.from_pretrained('maifeng/boilerplate_detection')
|
| 46 |
tokenizer = AutoTokenizer.from_pretrained('maifeng/boilerplate_detection')
|
| 47 |
|
|
|
|
|
|
|
|
|
|
| 48 |
model.eval()
|
| 49 |
|
| 50 |
# Classify texts
|
|
@@ -61,6 +61,7 @@ threshold = 0.5
|
|
| 61 |
results = []
|
| 62 |
for text in texts:
|
| 63 |
inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
|
|
|
|
| 64 |
|
| 65 |
with torch.no_grad():
|
| 66 |
outputs = model(**inputs)
|
|
|
|
| 3 |
language: en
|
| 4 |
tags:
|
| 5 |
- text-classification
|
| 6 |
+
- finance
|
| 7 |
+
- accounting
|
| 8 |
- financial-text
|
| 9 |
- boilerplate-detection
|
| 10 |
- analyst-reports
|
| 11 |
pipeline_tag: text-classification
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
# Boilerplate Detection for Financial Text
|
|
|
|
| 42 |
model = BoilerplateDetector.from_pretrained('maifeng/boilerplate_detection')
|
| 43 |
tokenizer = AutoTokenizer.from_pretrained('maifeng/boilerplate_detection')
|
| 44 |
|
| 45 |
+
# Move model to GPU if available
|
| 46 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 47 |
+
model = model.to(device)
|
| 48 |
model.eval()
|
| 49 |
|
| 50 |
# Classify texts
|
|
|
|
| 61 |
results = []
|
| 62 |
for text in texts:
|
| 63 |
inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
|
| 64 |
+
inputs = {k: v.to(device) for k, v in inputs.items()} # Move inputs to device
|
| 65 |
|
| 66 |
with torch.no_grad():
|
| 67 |
outputs = model(**inputs)
|