jimnoneill commited on
Commit
178b774
·
verified ·
1 Parent(s): 9da1366

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +48 -0
  2. bayesian_model_results.json +210 -0
  3. cohort_signatures.csv +17 -0
  4. setfit_agency_language/1_Pooling/config.json +10 -0
  5. setfit_agency_language/README.md +210 -0
  6. setfit_agency_language/config.json +23 -0
  7. setfit_agency_language/config_sentence_transformers.json +14 -0
  8. setfit_agency_language/config_setfit.json +4 -0
  9. setfit_agency_language/model.safetensors +3 -0
  10. setfit_agency_language/model_head.pkl +3 -0
  11. setfit_agency_language/modules.json +20 -0
  12. setfit_agency_language/sentence_bert_config.json +4 -0
  13. setfit_agency_language/special_tokens_map.json +51 -0
  14. setfit_agency_language/tokenizer.json +0 -0
  15. setfit_agency_language/tokenizer_config.json +73 -0
  16. setfit_agency_language/vocab.txt +0 -0
  17. setfit_engagement_bait/1_Pooling/config.json +10 -0
  18. setfit_engagement_bait/README.md +204 -0
  19. setfit_engagement_bait/config.json +23 -0
  20. setfit_engagement_bait/config_sentence_transformers.json +14 -0
  21. setfit_engagement_bait/config_setfit.json +4 -0
  22. setfit_engagement_bait/model.safetensors +3 -0
  23. setfit_engagement_bait/model_head.pkl +3 -0
  24. setfit_engagement_bait/modules.json +20 -0
  25. setfit_engagement_bait/sentence_bert_config.json +4 -0
  26. setfit_engagement_bait/special_tokens_map.json +51 -0
  27. setfit_engagement_bait/tokenizer.json +0 -0
  28. setfit_engagement_bait/tokenizer_config.json +73 -0
  29. setfit_engagement_bait/vocab.txt +0 -0
  30. setfit_epistemic_manipulation/1_Pooling/config.json +10 -0
  31. setfit_epistemic_manipulation/README.md +201 -0
  32. setfit_epistemic_manipulation/config.json +23 -0
  33. setfit_epistemic_manipulation/config_sentence_transformers.json +14 -0
  34. setfit_epistemic_manipulation/config_setfit.json +4 -0
  35. setfit_epistemic_manipulation/model.safetensors +3 -0
  36. setfit_epistemic_manipulation/model_head.pkl +3 -0
  37. setfit_epistemic_manipulation/modules.json +20 -0
  38. setfit_epistemic_manipulation/sentence_bert_config.json +4 -0
  39. setfit_epistemic_manipulation/special_tokens_map.json +51 -0
  40. setfit_epistemic_manipulation/tokenizer.json +0 -0
  41. setfit_epistemic_manipulation/tokenizer_config.json +73 -0
  42. setfit_epistemic_manipulation/vocab.txt +0 -0
  43. setfit_performative_outrage/1_Pooling/config.json +10 -0
  44. setfit_performative_outrage/README.md +212 -0
  45. setfit_performative_outrage/config.json +23 -0
  46. setfit_performative_outrage/config_sentence_transformers.json +14 -0
  47. setfit_performative_outrage/config_setfit.json +4 -0
  48. setfit_performative_outrage/model.safetensors +3 -0
  49. setfit_performative_outrage/model_head.pkl +3 -0
  50. setfit_performative_outrage/modules.json +20 -0
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ tags:
4
+ - social-media-analysis
5
+ - compulsion-detection
6
+ - political-tweets
7
+ - bayesian-classifier
8
+ - digital-phenotyping
9
+ pipeline_tag: text-classification
10
+ ---
11
+
12
+ # X-Box Compulsion Classifier
13
+
14
+ Bayesian classifier for detecting compulsive social media usage patterns
15
+ in political Twitter/X accounts.
16
+
17
+ ## Architecture
18
+
19
+ - **12 classification heads**: 6 CardiffNLP (sentiment, emotion, offensive, irony, hate, toxicity) + 6 custom SetFit (ragebait, tribal signal, performative outrage, epistemic manipulation, engagement bait, agency language)
20
+ - **Compulsion signatures**: Burstiness (Goh-Barabasi), time-of-day entropy, Hawkes self-excitation, night intensity, weekend ratio
21
+ - **Bayesian posterior**: Calibrated P(compulsive | features) with 95% credible intervals
22
+ - **Disorder baseline**: DSM-5-adjacent criteria mapping with clinical thresholds
23
+
24
+ ## Validation
25
+
26
+ - LOO cross-validation: F1=1.000, AUC=1.000 on 16-account ground truth cohort
27
+ - Ground truth: 8 known-compulsive accounts (Trump Android, Mike Lee, Cruz, Hawley, Blackburn, Rubio, Murphy) + 8 known-strategic accounts (Feinstein, Risch, Tester, etc.)
28
+
29
+ ## Feature Importance
30
+
31
+ | Feature | Mean |LLR| |
32
+ |---------|---------|
33
+ | Night intensity (00-05 UTC) | 28.1 |
34
+ | Time-of-day entropy | 8.0 |
35
+ | Burstiness B parameter | 4.8 |
36
+ | Hawkes self-excitation n* | 4.6 |
37
+ | Weekend ratio | 0.05 |
38
+
39
+ ## Theoretical Framework
40
+
41
+ Inspired by Recovery Viability Theory (Kepner, White, O'Neill):
42
+ - Logit-bounded state space
43
+ - Cusp catastrophe dynamics for sudden behavioral transitions
44
+ - Critical slowing down as early warning signals
45
+
46
+ ## Citation
47
+
48
+ Research by O'Neill Lab. Not for clinical diagnosis.
bayesian_model_results.json ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_params": {
3
+ "prior": {
4
+ "compulsive": 0.5,
5
+ "strategic": 0.5
6
+ },
7
+ "class_params": {
8
+ "compulsive": {
9
+ "burstiness_B": {
10
+ "mean": 0.3774861927721671,
11
+ "std": 0.22938978696976464,
12
+ "n": 8
13
+ },
14
+ "time_entropy": {
15
+ "mean": 4.14209602761983,
16
+ "std": 0.1735837176764721,
17
+ "n": 8
18
+ },
19
+ "night_intensity": {
20
+ "mean": 0.2286008328329098,
21
+ "std": 0.10958901025874938,
22
+ "n": 8
23
+ },
24
+ "hawkes_n": {
25
+ "mean": 0.9475845074610829,
26
+ "std": 0.03725061990326155,
27
+ "n": 8
28
+ },
29
+ "weekend_ratio": {
30
+ "mean": 0.7103416736296673,
31
+ "std": 0.1981138498143343,
32
+ "n": 8
33
+ }
34
+ },
35
+ "strategic": {
36
+ "burstiness_B": {
37
+ "mean": 0.10522888954988305,
38
+ "std": 0.08097919025103344,
39
+ "n": 8
40
+ },
41
+ "time_entropy": {
42
+ "mean": 3.541328083305317,
43
+ "std": 0.15778381821874674,
44
+ "n": 8
45
+ },
46
+ "night_intensity": {
47
+ "mean": 0.037430947815364754,
48
+ "std": 0.022583595321530345,
49
+ "n": 8
50
+ },
51
+ "hawkes_n": {
52
+ "mean": 0.8163132264766522,
53
+ "std": 0.06881668997824561,
54
+ "n": 8
55
+ },
56
+ "weekend_ratio": {
57
+ "mean": 0.6636698298243918,
58
+ "std": 0.19803933864431938,
59
+ "n": 8
60
+ }
61
+ }
62
+ },
63
+ "features": [
64
+ "burstiness_B",
65
+ "time_entropy",
66
+ "night_intensity",
67
+ "hawkes_n",
68
+ "weekend_ratio"
69
+ ],
70
+ "feature_weights": {
71
+ "burstiness_B": 1.0,
72
+ "time_entropy": 1.2,
73
+ "night_intensity": 1.2,
74
+ "hawkes_n": 1.0,
75
+ "weekend_ratio": 0.3
76
+ }
77
+ },
78
+ "cv_results": {
79
+ "accuracy": 1.0,
80
+ "f1": 1.0,
81
+ "precision": 1.0,
82
+ "recall": 1.0,
83
+ "auc": 1.0,
84
+ "n_samples": 16,
85
+ "confusion_matrix": [
86
+ [
87
+ 8,
88
+ 0
89
+ ],
90
+ [
91
+ 0,
92
+ 8
93
+ ]
94
+ ]
95
+ },
96
+ "cv_details": [
97
+ {
98
+ "account": "BasedMikeLee",
99
+ "true": "compulsive",
100
+ "pred": "compulsive",
101
+ "posterior": 1.0,
102
+ "correct": true
103
+ },
104
+ {
105
+ "account": "Trump_Android",
106
+ "true": "compulsive",
107
+ "pred": "compulsive",
108
+ "posterior": 1.0,
109
+ "correct": true
110
+ },
111
+ {
112
+ "account": "Trump_Full",
113
+ "true": "compulsive",
114
+ "pred": "compulsive",
115
+ "posterior": 1.0,
116
+ "correct": true
117
+ },
118
+ {
119
+ "account": "marcorubio",
120
+ "true": "compulsive",
121
+ "pred": "compulsive",
122
+ "posterior": 1.0,
123
+ "correct": true
124
+ },
125
+ {
126
+ "account": "ChrisMurphyCT",
127
+ "true": "compulsive",
128
+ "pred": "compulsive",
129
+ "posterior": 1.0,
130
+ "correct": true
131
+ },
132
+ {
133
+ "account": "HawleyMO",
134
+ "true": "compulsive",
135
+ "pred": "compulsive",
136
+ "posterior": 0.9996,
137
+ "correct": true
138
+ },
139
+ {
140
+ "account": "SenTedCruz",
141
+ "true": "compulsive",
142
+ "pred": "compulsive",
143
+ "posterior": 1.0,
144
+ "correct": true
145
+ },
146
+ {
147
+ "account": "MarshaBlackburn",
148
+ "true": "compulsive",
149
+ "pred": "compulsive",
150
+ "posterior": 1.0,
151
+ "correct": true
152
+ },
153
+ {
154
+ "account": "SenatorCollins",
155
+ "true": "strategic",
156
+ "pred": "strategic",
157
+ "posterior": 0.0002,
158
+ "correct": true
159
+ },
160
+ {
161
+ "account": "SenGillibrand",
162
+ "true": "strategic",
163
+ "pred": "strategic",
164
+ "posterior": 0.0,
165
+ "correct": true
166
+ },
167
+ {
168
+ "account": "SenJohnHoeven",
169
+ "true": "strategic",
170
+ "pred": "strategic",
171
+ "posterior": 0.0,
172
+ "correct": true
173
+ },
174
+ {
175
+ "account": "SenShelby",
176
+ "true": "strategic",
177
+ "pred": "strategic",
178
+ "posterior": 0.0,
179
+ "correct": true
180
+ },
181
+ {
182
+ "account": "SenHydeSmith",
183
+ "true": "strategic",
184
+ "pred": "strategic",
185
+ "posterior": 0.0,
186
+ "correct": true
187
+ },
188
+ {
189
+ "account": "SenatorTester",
190
+ "true": "strategic",
191
+ "pred": "strategic",
192
+ "posterior": 0.0,
193
+ "correct": true
194
+ },
195
+ {
196
+ "account": "SenatorRisch",
197
+ "true": "strategic",
198
+ "pred": "strategic",
199
+ "posterior": 0.0,
200
+ "correct": true
201
+ },
202
+ {
203
+ "account": "SenFeinstein",
204
+ "true": "strategic",
205
+ "pred": "strategic",
206
+ "posterior": 0.0,
207
+ "correct": true
208
+ }
209
+ ]
210
+ }
cohort_signatures.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ account,category,n_tweets,p_compulsive,classification,burstiness_B,time_entropy,weekend_ratio,night_intensity,hawkes_n,gap_shape,z_state
2
+ BasedMikeLee,compulsive,49415,0.9406263649632571,compulsive,0.7545326278796666,4.208015373082699,0.8377908084105669,0.40184154608924416,0.9837007722574105,power_law,2.7626957258242157
3
+ Trump_Android,compulsive,14545,0.8823061825550056,compulsive,0.5253151861781247,4.284990749399817,1.033448438141054,0.3969749054657958,0.9798360265680514,power_law,2.0144526567129484
4
+ Trump_Full,compulsive,32797,0.875071775820615,compulsive,0.6452797417842915,4.412102033911361,0.7814768927434289,0.22367899503003325,0.9769165070058401,power_law,1.9465665466849962
5
+ marcorubio,compulsive,766,0.7678070462143948,compulsive,0.2524741167614851,4.219965321149076,0.7440161104718067,0.1566579634464752,0.9219944937289691,power_law,1.1959697363233344
6
+ ChrisMurphyCT,compulsive,1283,0.7509103949248175,compulsive,0.19841235471107352,4.146720586553414,0.703444490188006,0.20187061574434917,0.933657627820327,log_normal,1.103473638827422
7
+ HawleyMO,compulsive,499,0.7228518121340829,compulsive,0.24958015504551243,3.9194049186207964,0.7014571331751949,0.14228456913827656,0.8735398679532758,power_law,0.9586519012961181
8
+ SenTedCruz,compulsive,1723,0.6980689954332613,compulsive,0.23902570871834355,3.961771111478407,0.4073355263157895,0.1323273360417876,0.9504588739607008,power_law,0.8381194156289073
9
+ MarshaBlackburn,compulsive,2050,0.6948594811223766,compulsive,0.1552696510988397,3.9837981267630704,0.47376398959149135,0.17317073170731706,0.9605718903940886,log_normal,0.8229372515459503
10
+ SenatorCollins,strategic,232,0.6322980420959895,mixed,0.09764735011166781,3.785114614422353,0.9807692307692308,0.08620689655172414,0.72,log_normal,0.5420881555550405
11
+ SenGillibrand,strategic,524,0.5746443007024875,mixed,-0.017533957525227656,3.632842523682244,0.6962025316455697,0.03625954198473282,0.8443915501338887,log_normal,0.3008254834974807
12
+ SenJohnHoeven,strategic,330,0.5741204562695057,mixed,0.05252092123115066,3.6349489923808225,0.6592713209851414,0.048484848484848485,0.7715277777777778,log_normal,0.29868268334246456
13
+ SenShelby,strategic,205,0.5718305322286668,mixed,0.1351874084196579,3.401485372907101,0.89586969168121,0.02926829268292683,0.7272727272727273,log_normal,0.28932360520501765
14
+ SenHydeSmith,strategic,553,0.5654565962665936,mixed,0.03903979866952534,3.6503894273604685,0.6369230769230769,0.018083182640144666,0.8526821457165733,log_normal,0.26333770653226224
15
+ SenatorTester,strategic,604,0.5642272020191504,mixed,0.12048563517633806,3.4525234462090433,0.551470588235294,0.039735099337748346,0.8724079559881507,log_normal,0.2583360088612716
16
+ SenatorRisch,strategic,462,0.5458959564095442,mixed,0.17832433600182634,3.4591774752403817,0.513530522341095,0.015151515151515152,0.834529791816224,log_normal,0.18410205887052772
17
+ SenFeinstein,strategic,914,0.5423587988964276,mixed,0.23615962431412627,3.3141428142401184,0.3753216760145167,0.0262582056892779,0.9076938631078758,log_normal,0.1698422996151933
setfit_agency_language/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
setfit_agency_language/README.md ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - setfit
4
+ - sentence-transformers
5
+ - text-classification
6
+ - generated_from_setfit_trainer
7
+ widget:
8
+ - text: 'RT @BasedMikeLee: If she didn’t know of Biden’s condition all along, then—by
9
+ repeatedly and emphatically telling us Biden was fit for offic…'
10
+ - text: 'One more win and the @braves take the #WorldSeriesweve got this, yall! #BattleATL
11
+ https://t.co/iEw8Nx7LIP'
12
+ - text: While SCOTUS has allowed challenges to SB to proceed, it's outrageous that
13
+ the Court has again decided not to block Texas' unconstitutional abortion ban.
14
+ More Texans are harmed every day this law is allowed to stand. The Senate must
15
+ pass the Women's Health Protection Act.
16
+ - text: The @TulsiGabbard vote needs to be public Her nomination isn’t classified The
17
+ vote on her nomination shouldn’t be treated as if it were https://t.co/Rd4RfFxWmF
18
+ - text: 'The Executive Branch should be under the direction of the president That’s
19
+ how the Constitution was designed The Federal Reserve is one of many examples
20
+ of how we’ve deviated from the Constitution in that regard Yet another reason
21
+ why we should #EndTheFed https://t.co/qfW7tFdhe8'
22
+ metrics:
23
+ - accuracy
24
+ pipeline_tag: text-classification
25
+ library_name: setfit
26
+ inference: true
27
+ base_model: sentence-transformers/all-mpnet-base-v2
28
+ model-index:
29
+ - name: SetFit with sentence-transformers/all-mpnet-base-v2
30
+ results:
31
+ - task:
32
+ type: text-classification
33
+ name: Text Classification
34
+ dataset:
35
+ name: Unknown
36
+ type: unknown
37
+ split: test
38
+ metrics:
39
+ - type: accuracy
40
+ value: 0.8375
41
+ name: Accuracy
42
+ ---
43
+
44
+ # SetFit with sentence-transformers/all-mpnet-base-v2
45
+
46
+ This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
47
+
48
+ The model has been trained using an efficient few-shot learning technique that involves:
49
+
50
+ 1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
51
+ 2. Training a classification head with features from the fine-tuned Sentence Transformer.
52
+
53
+ ## Model Details
54
+
55
+ ### Model Description
56
+ - **Model Type:** SetFit
57
+ - **Sentence Transformer body:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)
58
+ - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
59
+ - **Maximum Sequence Length:** 384 tokens
60
+ - **Number of Classes:** 2 classes
61
+ <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
62
+ <!-- - **Language:** Unknown -->
63
+ <!-- - **License:** Unknown -->
64
+
65
+ ### Model Sources
66
+
67
+ - **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
68
+ - **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
69
+ - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
70
+
71
+ ### Model Labels
72
+ | Label | Examples |
73
+ |:------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
74
+ | 0 | <ul><li>"President Joe Biden just signed the #AmericanRescuePlan. Help is on the way. I'll say it again: Thank God for Georgia."</li><li>'@1stacyphillips Odd that he didn’t tell us himself'</li><li>'Stablecoins pose risks to consumers and to our economy. Theyre propping up one of the shadiest parts of the crypto world, DeFi, where consumers are least protected from getting scammed. Our regulators need to get serious about clamping down before it is too late. https://t.co/hMOT1HIQgn'</li></ul> |
75
+ | 1 | <ul><li>'Hassan Sheikh Mohamud, the president of Somalia, is *not* “our president��� It’s troubling that any member of Congress—regardless of her party affiliation or national origin—would suggest otherwise Who’s with me? https://t.co/7NBXuQeu6R https://t.co/AgfvVWL4PD'</li><li>'RT @BasedMikeLee: This familiar headline—“Congress announces a deal to avoid a shutdown”—is premature &amp; misleading. “Congress” hasn’t eve…'</li><li>'El Salvador is safe because @nayibbukele locked up the people who had long made it unsafe'</li></ul> |
76
+
77
+ ## Evaluation
78
+
79
+ ### Metrics
80
+ | Label | Accuracy |
81
+ |:--------|:---------|
82
+ | **all** | 0.8375 |
83
+
84
+ ## Uses
85
+
86
+ ### Direct Use for Inference
87
+
88
+ First install the SetFit library:
89
+
90
+ ```bash
91
+ pip install setfit
92
+ ```
93
+
94
+ Then you can load this model and run inference.
95
+
96
+ ```python
97
+ from setfit import SetFitModel
98
+
99
+ # Download from the 🤗 Hub
100
+ model = SetFitModel.from_pretrained("setfit_model_id")
101
+ # Run inference
102
+ preds = model("One more win and the @braves take the #WorldSeriesweve got this, yall! #BattleATL https://t.co/iEw8Nx7LIP")
103
+ ```
104
+
105
+ <!--
106
+ ### Downstream Use
107
+
108
+ *List how someone could finetune this model on their own dataset.*
109
+ -->
110
+
111
+ <!--
112
+ ### Out-of-Scope Use
113
+
114
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
115
+ -->
116
+
117
+ <!--
118
+ ## Bias, Risks and Limitations
119
+
120
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
121
+ -->
122
+
123
+ <!--
124
+ ### Recommendations
125
+
126
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
127
+ -->
128
+
129
+ ## Training Details
130
+
131
+ ### Training Set Metrics
132
+ | Training set | Min | Median | Max |
133
+ |:-------------|:----|:--------|:----|
134
+ | Word count | 2 | 25.8156 | 57 |
135
+
136
+ | Label | Training Sample Count |
137
+ |:------|:----------------------|
138
+ | 0 | 158 |
139
+ | 1 | 162 |
140
+
141
+ ### Training Hyperparameters
142
+ - batch_size: (16, 16)
143
+ - num_epochs: (1, 1)
144
+ - max_steps: -1
145
+ - sampling_strategy: oversampling
146
+ - num_iterations: 5
147
+ - body_learning_rate: (2e-05, 1e-05)
148
+ - head_learning_rate: 0.01
149
+ - loss: CosineSimilarityLoss
150
+ - distance_metric: cosine_distance
151
+ - margin: 0.25
152
+ - end_to_end: False
153
+ - use_amp: False
154
+ - warmup_proportion: 0.1
155
+ - l2_weight: 0.01
156
+ - seed: 42
157
+ - eval_max_steps: -1
158
+ - load_best_model_at_end: False
159
+
160
+ ### Training Results
161
+ | Epoch | Step | Training Loss | Validation Loss |
162
+ |:-----:|:----:|:-------------:|:---------------:|
163
+ | 0.005 | 1 | 0.5111 | - |
164
+ | 0.25 | 50 | 0.2576 | - |
165
+ | 0.5 | 100 | 0.1736 | - |
166
+ | 0.75 | 150 | 0.0497 | - |
167
+ | 1.0 | 200 | 0.0204 | - |
168
+
169
+ ### Framework Versions
170
+ - Python: 3.12.3
171
+ - SetFit: 1.1.3
172
+ - Sentence Transformers: 5.3.0
173
+ - Transformers: 4.57.6
174
+ - PyTorch: 2.10.0+cpu
175
+ - Datasets: 4.8.3
176
+ - Tokenizers: 0.22.2
177
+
178
+ ## Citation
179
+
180
+ ### BibTeX
181
+ ```bibtex
182
+ @article{https://doi.org/10.48550/arxiv.2209.11055,
183
+ doi = {10.48550/ARXIV.2209.11055},
184
+ url = {https://arxiv.org/abs/2209.11055},
185
+ author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
186
+ keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
187
+ title = {Efficient Few-Shot Learning Without Prompts},
188
+ publisher = {arXiv},
189
+ year = {2022},
190
+ copyright = {Creative Commons Attribution 4.0 International}
191
+ }
192
+ ```
193
+
194
+ <!--
195
+ ## Glossary
196
+
197
+ *Clearly define terms in order to be accessible across audiences.*
198
+ -->
199
+
200
+ <!--
201
+ ## Model Card Authors
202
+
203
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
204
+ -->
205
+
206
+ <!--
207
+ ## Model Card Contact
208
+
209
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
210
+ -->
setfit_agency_language/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MPNetModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "dtype": "float32",
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "mpnet",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "relative_attention_num_buckets": 32,
21
+ "transformers_version": "4.57.6",
22
+ "vocab_size": 30527
23
+ }
setfit_agency_language/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "5.3.0",
4
+ "transformers": "4.57.6",
5
+ "pytorch": "2.10.0+cpu"
6
+ },
7
+ "model_type": "SentenceTransformer",
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
setfit_agency_language/config_setfit.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "labels": null,
3
+ "normalize_embeddings": false
4
+ }
setfit_agency_language/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ffba55d5078f6853e0f2ba7861b3f1867289354b9248c358f835877407a44ff
3
+ size 437967672
setfit_agency_language/model_head.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5818380ccbf4631b795e15086104391abc774bb1e42d81c250071ec707b50329
3
+ size 7007
setfit_agency_language/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
setfit_agency_language/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 384,
3
+ "do_lower_case": false
4
+ }
setfit_agency_language/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
setfit_agency_language/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
setfit_agency_language/tokenizer_config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "30526": {
44
+ "content": "<mask>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": false,
54
+ "cls_token": "<s>",
55
+ "do_lower_case": true,
56
+ "eos_token": "</s>",
57
+ "extra_special_tokens": {},
58
+ "mask_token": "<mask>",
59
+ "max_length": 128,
60
+ "model_max_length": 384,
61
+ "pad_to_multiple_of": null,
62
+ "pad_token": "<pad>",
63
+ "pad_token_type_id": 0,
64
+ "padding_side": "right",
65
+ "sep_token": "</s>",
66
+ "stride": 0,
67
+ "strip_accents": null,
68
+ "tokenize_chinese_chars": true,
69
+ "tokenizer_class": "MPNetTokenizer",
70
+ "truncation_side": "right",
71
+ "truncation_strategy": "longest_first",
72
+ "unk_token": "[UNK]"
73
+ }
setfit_agency_language/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
setfit_engagement_bait/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
setfit_engagement_bait/README.md ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - setfit
4
+ - sentence-transformers
5
+ - text-classification
6
+ - generated_from_setfit_trainer
7
+ widget:
8
+ - text: 'Congratulations to Floridians @eddyalvarez90, Nick Martinez and Tristian
9
+ Casas for winning silver in baseball at the #Tokyo2020 Olympics! Keep up this
10
+ great work. https://t.co/QUQQYjcNSk'
11
+ - text: Saudi Arabia should fight their own wars, which they won't, or pay us an absolute
12
+ fortune to protect them and their great wealth-$ trillion!
13
+ - text: Mmmm. So much logic happening here. https://t.co/vuVETxXUU7
14
+ - text: Who’s not ready for “Mamala the Country”? https://t.co/ROT1RXTOHH
15
+ - text: 'Big Balls for the Medal of Freedom Who agrees? #FederalizeDC'
16
+ metrics:
17
+ - accuracy
18
+ pipeline_tag: text-classification
19
+ library_name: setfit
20
+ inference: true
21
+ base_model: sentence-transformers/all-mpnet-base-v2
22
+ model-index:
23
+ - name: SetFit with sentence-transformers/all-mpnet-base-v2
24
+ results:
25
+ - task:
26
+ type: text-classification
27
+ name: Text Classification
28
+ dataset:
29
+ name: Unknown
30
+ type: unknown
31
+ split: test
32
+ metrics:
33
+ - type: accuracy
34
+ value: 0.8
35
+ name: Accuracy
36
+ ---
37
+
38
+ # SetFit with sentence-transformers/all-mpnet-base-v2
39
+
40
+ This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
41
+
42
+ The model has been trained using an efficient few-shot learning technique that involves:
43
+
44
+ 1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
45
+ 2. Training a classification head with features from the fine-tuned Sentence Transformer.
46
+
47
+ ## Model Details
48
+
49
+ ### Model Description
50
+ - **Model Type:** SetFit
51
+ - **Sentence Transformer body:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)
52
+ - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
53
+ - **Maximum Sequence Length:** 384 tokens
54
+ - **Number of Classes:** 2 classes
55
+ <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
56
+ <!-- - **Language:** Unknown -->
57
+ <!-- - **License:** Unknown -->
58
+
59
+ ### Model Sources
60
+
61
+ - **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
62
+ - **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
63
+ - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
64
+
65
+ ### Model Labels
66
+ | Label | Examples |
67
+ |:------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
68
+ | 0 | <ul><li>'Those closest to the disease should be the closest to the vaccine. Thats the moment were in." - @macurry01 @MassLeague https://t.co/wDB80seVEC'</li><li>'The Pioneers for Womans Suffrage statue can be found in the US Capitol Rotunda. It celebrates those who led the way for womens right to vote #realhistorychannel #womenshistorymonth @ United States Capitol https://t.co/7D3Xr3uAID'</li><li>'Last week I was proud to introduce my Improving Care for Veterans Actwhich would raise the standard of care in Veterans Homes to better protect our courageous veterans, prevent outbreaks and improve health outcomes for our heroes in Georgia and nationwide.'</li></ul> |
69
+ | 1 | <ul><li>'So … why didn’t they beef up Trump’s security detail after learning of an Iranian plot to assassinate him? It seems likely that this would’ve thwarted what happened on Saturday. https://t.co/qn4NkryBkl'</li><li>'Who else would like once again to be able to buy “real” light bulbs, showerheads, and household appliances—i.e., these products as they existed before federal bureaucrats ruined them by forcing manufacturers to comply with rigid energy- and water-efficiency standards?'</li><li>'Who else voted for this? https://t.co/TTKaTaahEI'</li></ul> |
70
+
71
+ ## Evaluation
72
+
73
+ ### Metrics
74
+ | Label | Accuracy |
75
+ |:--------|:---------|
76
+ | **all** | 0.8 |
77
+
78
+ ## Uses
79
+
80
+ ### Direct Use for Inference
81
+
82
+ First install the SetFit library:
83
+
84
+ ```bash
85
+ pip install setfit
86
+ ```
87
+
88
+ Then you can load this model and run inference.
89
+
90
+ ```python
91
+ from setfit import SetFitModel
92
+
93
+ # Download from the 🤗 Hub
94
+ model = SetFitModel.from_pretrained("setfit_model_id")
95
+ # Run inference
96
+ preds = model("Mmmm. So much logic happening here. https://t.co/vuVETxXUU7")
97
+ ```
98
+
99
+ <!--
100
+ ### Downstream Use
101
+
102
+ *List how someone could finetune this model on their own dataset.*
103
+ -->
104
+
105
+ <!--
106
+ ### Out-of-Scope Use
107
+
108
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
109
+ -->
110
+
111
+ <!--
112
+ ## Bias, Risks and Limitations
113
+
114
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
115
+ -->
116
+
117
+ <!--
118
+ ### Recommendations
119
+
120
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
121
+ -->
122
+
123
+ ## Training Details
124
+
125
+ ### Training Set Metrics
126
+ | Training set | Min | Median | Max |
127
+ |:-------------|:----|:--------|:----|
128
+ | Word count | 1 | 22.1375 | 60 |
129
+
130
+ | Label | Training Sample Count |
131
+ |:------|:----------------------|
132
+ | 0 | 158 |
133
+ | 1 | 162 |
134
+
135
+ ### Training Hyperparameters
136
+ - batch_size: (16, 16)
137
+ - num_epochs: (1, 1)
138
+ - max_steps: -1
139
+ - sampling_strategy: oversampling
140
+ - num_iterations: 5
141
+ - body_learning_rate: (2e-05, 1e-05)
142
+ - head_learning_rate: 0.01
143
+ - loss: CosineSimilarityLoss
144
+ - distance_metric: cosine_distance
145
+ - margin: 0.25
146
+ - end_to_end: False
147
+ - use_amp: False
148
+ - warmup_proportion: 0.1
149
+ - l2_weight: 0.01
150
+ - seed: 42
151
+ - eval_max_steps: -1
152
+ - load_best_model_at_end: False
153
+
154
+ ### Training Results
155
+ | Epoch | Step | Training Loss | Validation Loss |
156
+ |:-----:|:----:|:-------------:|:---------------:|
157
+ | 0.005 | 1 | 0.4707 | - |
158
+ | 0.25 | 50 | 0.2633 | - |
159
+ | 0.5 | 100 | 0.2054 | - |
160
+ | 0.75 | 150 | 0.0767 | - |
161
+ | 1.0 | 200 | 0.0234 | - |
162
+
163
+ ### Framework Versions
164
+ - Python: 3.12.3
165
+ - SetFit: 1.1.3
166
+ - Sentence Transformers: 5.3.0
167
+ - Transformers: 4.57.6
168
+ - PyTorch: 2.10.0+cpu
169
+ - Datasets: 4.8.3
170
+ - Tokenizers: 0.22.2
171
+
172
+ ## Citation
173
+
174
+ ### BibTeX
175
+ ```bibtex
176
+ @article{https://doi.org/10.48550/arxiv.2209.11055,
177
+ doi = {10.48550/ARXIV.2209.11055},
178
+ url = {https://arxiv.org/abs/2209.11055},
179
+ author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
180
+ keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
181
+ title = {Efficient Few-Shot Learning Without Prompts},
182
+ publisher = {arXiv},
183
+ year = {2022},
184
+ copyright = {Creative Commons Attribution 4.0 International}
185
+ }
186
+ ```
187
+
188
+ <!--
189
+ ## Glossary
190
+
191
+ *Clearly define terms in order to be accessible across audiences.*
192
+ -->
193
+
194
+ <!--
195
+ ## Model Card Authors
196
+
197
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
198
+ -->
199
+
200
+ <!--
201
+ ## Model Card Contact
202
+
203
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
204
+ -->
setfit_engagement_bait/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MPNetModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "dtype": "float32",
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "mpnet",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "relative_attention_num_buckets": 32,
21
+ "transformers_version": "4.57.6",
22
+ "vocab_size": 30527
23
+ }
setfit_engagement_bait/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "5.3.0",
4
+ "transformers": "4.57.6",
5
+ "pytorch": "2.10.0+cpu"
6
+ },
7
+ "model_type": "SentenceTransformer",
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
setfit_engagement_bait/config_setfit.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "labels": null,
3
+ "normalize_embeddings": false
4
+ }
setfit_engagement_bait/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89716348b8b0105e8838a547f6a45b091697bd9ef70bef472e14361c753f767b
3
+ size 437967672
setfit_engagement_bait/model_head.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35d91fe08a147c561a79c0100b29170f012e7c79467abec89f2e659fc23b7ceb
3
+ size 7007
setfit_engagement_bait/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
setfit_engagement_bait/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 384,
3
+ "do_lower_case": false
4
+ }
setfit_engagement_bait/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
setfit_engagement_bait/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
setfit_engagement_bait/tokenizer_config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "30526": {
44
+ "content": "<mask>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": false,
54
+ "cls_token": "<s>",
55
+ "do_lower_case": true,
56
+ "eos_token": "</s>",
57
+ "extra_special_tokens": {},
58
+ "mask_token": "<mask>",
59
+ "max_length": 128,
60
+ "model_max_length": 384,
61
+ "pad_to_multiple_of": null,
62
+ "pad_token": "<pad>",
63
+ "pad_token_type_id": 0,
64
+ "padding_side": "right",
65
+ "sep_token": "</s>",
66
+ "stride": 0,
67
+ "strip_accents": null,
68
+ "tokenize_chinese_chars": true,
69
+ "tokenizer_class": "MPNetTokenizer",
70
+ "truncation_side": "right",
71
+ "truncation_strategy": "longest_first",
72
+ "unk_token": "[UNK]"
73
+ }
setfit_engagement_bait/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
setfit_epistemic_manipulation/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
setfit_epistemic_manipulation/README.md ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - setfit
4
+ - sentence-transformers
5
+ - text-classification
6
+ - generated_from_setfit_trainer
7
+ widget:
8
+ - text: 'Democrats two months ago: “Noncitizens don’t vote—or even *register* to vote.” Democrats
9
+ now: “We’re suing states that try to remove noncitizens from voter rolls.”'
10
+ - text: Who else wishes we could swear in Trump tomorrow?
11
+ - text: Should we take the remittance tax to 35%? https://t.co/rBKwjuBu1S
12
+ - text: Good idea, right? https://t.co/XyoWzUzT4a
13
+ - text: I’ve found another good defamation lawsuit for the estate of Charlie Kirk
14
+ to bring Charlie was *none* of the things this headline falsely accuses him of
15
+ being Who else finds this reprehensible? https://t.co/LdS3XqY9pL
16
+ metrics:
17
+ - accuracy
18
+ pipeline_tag: text-classification
19
+ library_name: setfit
20
+ inference: true
21
+ base_model: sentence-transformers/all-mpnet-base-v2
22
+ model-index:
23
+ - name: SetFit with sentence-transformers/all-mpnet-base-v2
24
+ results:
25
+ - task:
26
+ type: text-classification
27
+ name: Text Classification
28
+ dataset:
29
+ name: Unknown
30
+ type: unknown
31
+ split: test
32
+ metrics:
33
+ - type: accuracy
34
+ value: 0.8
35
+ name: Accuracy
36
+ ---
37
+
38
+ # SetFit with sentence-transformers/all-mpnet-base-v2
39
+
40
+ This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
41
+
42
+ The model has been trained using an efficient few-shot learning technique that involves:
43
+
44
+ 1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
45
+ 2. Training a classification head with features from the fine-tuned Sentence Transformer.
46
+
47
+ ## Model Details
48
+
49
+ ### Model Description
50
+ - **Model Type:** SetFit
51
+ - **Sentence Transformer body:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)
52
+ - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
53
+ - **Maximum Sequence Length:** 384 tokens
54
+ - **Number of Classes:** 2 classes
55
+ <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
56
+ <!-- - **Language:** Unknown -->
57
+ <!-- - **License:** Unknown -->
58
+
59
+ ### Model Sources
60
+
61
+ - **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
62
+ - **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
63
+ - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
64
+
65
+ ### Model Labels
66
+ | Label | Examples |
67
+ |:------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
68
+ | 1 | <ul><li>'RT @realDonaldTrump: Wow! York County, Pennsylvania, received THOUSANDS of potentially FRAUDULENT Voter Registration Forms and Mail-In Ball…'</li><li>'Only an enemy would do this to another country Only a traitor would do it to his own Share if you agree that there must be consequences for this betrayal https://t.co/BkqsRstugX'</li><li>'I hope @Kash_Patel fires the “Patriot Front” wing of the FBI before tomorrow morning https://t.co/hcyGgidDbu'</li></ul> |
69
+ | 0 | <ul><li>'Is @JDVance the GOAT Vice President of the United States? A—Yes B—No https://t.co/yoZH89jkpx'</li><li>'I’m grateful to @elonmusk for buying Twitter And turning it into X Raise your hand if you agree https://t.co/ToOjLY56xU'</li><li>'🚨 I’m thrilled to receive the news that Senator Susan Collins has announced her support for the SAVE America Act!🚨 This is huge! https://t.co/PxAvRD2C5N'</li></ul> |
70
+
71
+ ## Evaluation
72
+
73
+ ### Metrics
74
+ | Label | Accuracy |
75
+ |:--------|:---------|
76
+ | **all** | 0.8 |
77
+
78
+ ## Uses
79
+
80
+ ### Direct Use for Inference
81
+
82
+ First install the SetFit library:
83
+
84
+ ```bash
85
+ pip install setfit
86
+ ```
87
+
88
+ Then you can load this model and run inference.
89
+
90
+ ```python
91
+ from setfit import SetFitModel
92
+
93
+ # Download from the 🤗 Hub
94
+ model = SetFitModel.from_pretrained("setfit_model_id")
95
+ # Run inference
96
+ preds = model("Good idea, right? https://t.co/XyoWzUzT4a")
97
+ ```
98
+
99
+ <!--
100
+ ### Downstream Use
101
+
102
+ *List how someone could finetune this model on their own dataset.*
103
+ -->
104
+
105
+ <!--
106
+ ### Out-of-Scope Use
107
+
108
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
109
+ -->
110
+
111
+ <!--
112
+ ## Bias, Risks and Limitations
113
+
114
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
115
+ -->
116
+
117
+ <!--
118
+ ### Recommendations
119
+
120
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
121
+ -->
122
+
123
+ ## Training Details
124
+
125
+ ### Training Set Metrics
126
+ | Training set | Min | Median | Max |
127
+ |:-------------|:----|:--------|:----|
128
+ | Word count | 2 | 25.1625 | 106 |
129
+
130
+ | Label | Training Sample Count |
131
+ |:------|:----------------------|
132
+ | 0 | 41 |
133
+ | 1 | 39 |
134
+
135
+ ### Training Hyperparameters
136
+ - batch_size: (16, 16)
137
+ - num_epochs: (1, 1)
138
+ - max_steps: -1
139
+ - sampling_strategy: oversampling
140
+ - num_iterations: 5
141
+ - body_learning_rate: (2e-05, 1e-05)
142
+ - head_learning_rate: 0.01
143
+ - loss: CosineSimilarityLoss
144
+ - distance_metric: cosine_distance
145
+ - margin: 0.25
146
+ - end_to_end: False
147
+ - use_amp: False
148
+ - warmup_proportion: 0.1
149
+ - l2_weight: 0.01
150
+ - seed: 42
151
+ - eval_max_steps: -1
152
+ - load_best_model_at_end: False
153
+
154
+ ### Training Results
155
+ | Epoch | Step | Training Loss | Validation Loss |
156
+ |:-----:|:----:|:-------------:|:---------------:|
157
+ | 0.02 | 1 | 0.3921 | - |
158
+ | 1.0 | 50 | 0.2153 | - |
159
+
160
+ ### Framework Versions
161
+ - Python: 3.12.3
162
+ - SetFit: 1.1.3
163
+ - Sentence Transformers: 5.3.0
164
+ - Transformers: 4.57.6
165
+ - PyTorch: 2.10.0+cpu
166
+ - Datasets: 4.8.3
167
+ - Tokenizers: 0.22.2
168
+
169
+ ## Citation
170
+
171
+ ### BibTeX
172
+ ```bibtex
173
+ @article{https://doi.org/10.48550/arxiv.2209.11055,
174
+ doi = {10.48550/ARXIV.2209.11055},
175
+ url = {https://arxiv.org/abs/2209.11055},
176
+ author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
177
+ keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
178
+ title = {Efficient Few-Shot Learning Without Prompts},
179
+ publisher = {arXiv},
180
+ year = {2022},
181
+ copyright = {Creative Commons Attribution 4.0 International}
182
+ }
183
+ ```
184
+
185
+ <!--
186
+ ## Glossary
187
+
188
+ *Clearly define terms in order to be accessible across audiences.*
189
+ -->
190
+
191
+ <!--
192
+ ## Model Card Authors
193
+
194
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
195
+ -->
196
+
197
+ <!--
198
+ ## Model Card Contact
199
+
200
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
201
+ -->
setfit_epistemic_manipulation/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MPNetModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "dtype": "float32",
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "mpnet",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "relative_attention_num_buckets": 32,
21
+ "transformers_version": "4.57.6",
22
+ "vocab_size": 30527
23
+ }
setfit_epistemic_manipulation/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "5.3.0",
4
+ "transformers": "4.57.6",
5
+ "pytorch": "2.10.0+cpu"
6
+ },
7
+ "model_type": "SentenceTransformer",
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
setfit_epistemic_manipulation/config_setfit.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "normalize_embeddings": false,
3
+ "labels": null
4
+ }
setfit_epistemic_manipulation/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21af567e3db9aaa645421df60d72e2a000c6b8819ef16fdaf2fcab95f7f12780
3
+ size 437967672
setfit_epistemic_manipulation/model_head.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eb09e0eb43d28b5b1b4e45484bc4f7362972da5087c339f2c1fd369598f52c4
3
+ size 7007
setfit_epistemic_manipulation/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
setfit_epistemic_manipulation/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 384,
3
+ "do_lower_case": false
4
+ }
setfit_epistemic_manipulation/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
setfit_epistemic_manipulation/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
setfit_epistemic_manipulation/tokenizer_config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "30526": {
44
+ "content": "<mask>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": false,
54
+ "cls_token": "<s>",
55
+ "do_lower_case": true,
56
+ "eos_token": "</s>",
57
+ "extra_special_tokens": {},
58
+ "mask_token": "<mask>",
59
+ "max_length": 128,
60
+ "model_max_length": 384,
61
+ "pad_to_multiple_of": null,
62
+ "pad_token": "<pad>",
63
+ "pad_token_type_id": 0,
64
+ "padding_side": "right",
65
+ "sep_token": "</s>",
66
+ "stride": 0,
67
+ "strip_accents": null,
68
+ "tokenize_chinese_chars": true,
69
+ "tokenizer_class": "MPNetTokenizer",
70
+ "truncation_side": "right",
71
+ "truncation_strategy": "longest_first",
72
+ "unk_token": "[UNK]"
73
+ }
setfit_epistemic_manipulation/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
setfit_performative_outrage/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
setfit_performative_outrage/README.md ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - setfit
4
+ - sentence-transformers
5
+ - text-classification
6
+ - generated_from_setfit_trainer
7
+ widget:
8
+ - text: President Erdogans free pass from the Trump White House to commit abuses has
9
+ officially expired. My legislation with @RonWyden and @SenJeffMerkley makes clear
10
+ that President Biden must use all diplomatic tools to hold the Erdogan government
11
+ accountable. https://t.co/ker7Y54wsz
12
+ - text: Every day reports of how the open border policy of Pres Biden is enriching
13
+ cartel bc no migrant can get in US w/o paying cartel All adding up to multi million
14
+ $$ every wk Open borders also national security issue when we arrest ppl on our
15
+ terrorist list
16
+ - text: '@SASCGOP Alabama wins in #FY22NDAA include funding authorizations for: hypersonic
17
+ development and testing at Huntsvilles Missile Defense Agency repair and maintenance
18
+ at Dannelly Field barracks renovations at Ft. Rucker Expeditionary Fast Transport
19
+ vessels to be built in Mobile'
20
+ - text: America is the land of the free because we are the home of the brave. Someone
21
+ please tell Biden what brave means.
22
+ - text: The radical left hates SCOTUS because it’s no longer willing to improperly
23
+ advance and protect the radical left’s legislative agenda.
24
+ metrics:
25
+ - accuracy
26
+ pipeline_tag: text-classification
27
+ library_name: setfit
28
+ inference: true
29
+ base_model: sentence-transformers/all-mpnet-base-v2
30
+ model-index:
31
+ - name: SetFit with sentence-transformers/all-mpnet-base-v2
32
+ results:
33
+ - task:
34
+ type: text-classification
35
+ name: Text Classification
36
+ dataset:
37
+ name: Unknown
38
+ type: unknown
39
+ split: test
40
+ metrics:
41
+ - type: accuracy
42
+ value: 0.85
43
+ name: Accuracy
44
+ ---
45
+
46
+ # SetFit with sentence-transformers/all-mpnet-base-v2
47
+
48
+ This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
49
+
50
+ The model has been trained using an efficient few-shot learning technique that involves:
51
+
52
+ 1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
53
+ 2. Training a classification head with features from the fine-tuned Sentence Transformer.
54
+
55
+ ## Model Details
56
+
57
+ ### Model Description
58
+ - **Model Type:** SetFit
59
+ - **Sentence Transformer body:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)
60
+ - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
61
+ - **Maximum Sequence Length:** 384 tokens
62
+ - **Number of Classes:** 2 classes
63
+ <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
64
+ <!-- - **Language:** Unknown -->
65
+ <!-- - **License:** Unknown -->
66
+
67
+ ### Model Sources
68
+
69
+ - **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
70
+ - **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
71
+ - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
72
+
73
+ ### Model Labels
74
+ | Label | Examples |
75
+ |:------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
76
+ | 0 | <ul><li>'@RyanGreenfield_ Folded napkin in lap.'</li><li>'As we reflect, we should acknowledge the dark underbelly of mistrust and fear that reared its ugly head, particularly against Muslim, Middle Eastern, North African, and wider South Asian communities.'</li><li>'Dems rammed thru $1.9T under guise of COVID relief when Republicans proposed providing same relief for fraction of cost. Biden now proposing bills for $4 trillion+ w a grab bag of items. Total Biden extra spending over X annual discretionary budget incl military, schools, etc'</li></ul> |
77
+ | 1 | <ul><li>'Later today I’ll be calling up the SAVE Act on the Senate floor, seeking immediate passage. It makes sense to oppose this bill only if you want noncitizens to vote.'</li><li>'@beaustin24 Make America Flat Again'</li><li>'Who else is frustrated that we still know almost *nothing* about either of the guys who tried to kill Trump last summer?'</li></ul> |
78
+
79
+ ## Evaluation
80
+
81
+ ### Metrics
82
+ | Label | Accuracy |
83
+ |:--------|:---------|
84
+ | **all** | 0.85 |
85
+
86
+ ## Uses
87
+
88
+ ### Direct Use for Inference
89
+
90
+ First install the SetFit library:
91
+
92
+ ```bash
93
+ pip install setfit
94
+ ```
95
+
96
+ Then you can load this model and run inference.
97
+
98
+ ```python
99
+ from setfit import SetFitModel
100
+
101
+ # Download from the 🤗 Hub
102
+ model = SetFitModel.from_pretrained("setfit_model_id")
103
+ # Run inference
104
+ preds = model("America is the land of the free because we are the home of the brave. Someone please tell Biden what brave means.")
105
+ ```
106
+
107
+ <!--
108
+ ### Downstream Use
109
+
110
+ *List how someone could finetune this model on their own dataset.*
111
+ -->
112
+
113
+ <!--
114
+ ### Out-of-Scope Use
115
+
116
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
117
+ -->
118
+
119
+ <!--
120
+ ## Bias, Risks and Limitations
121
+
122
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
123
+ -->
124
+
125
+ <!--
126
+ ### Recommendations
127
+
128
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
129
+ -->
130
+
131
+ ## Training Details
132
+
133
+ ### Training Set Metrics
134
+ | Training set | Min | Median | Max |
135
+ |:-------------|:----|:--------|:----|
136
+ | Word count | 1 | 23.8813 | 58 |
137
+
138
+ | Label | Training Sample Count |
139
+ |:------|:----------------------|
140
+ | 0 | 158 |
141
+ | 1 | 162 |
142
+
143
+ ### Training Hyperparameters
144
+ - batch_size: (16, 16)
145
+ - num_epochs: (1, 1)
146
+ - max_steps: -1
147
+ - sampling_strategy: oversampling
148
+ - num_iterations: 5
149
+ - body_learning_rate: (2e-05, 1e-05)
150
+ - head_learning_rate: 0.01
151
+ - loss: CosineSimilarityLoss
152
+ - distance_metric: cosine_distance
153
+ - margin: 0.25
154
+ - end_to_end: False
155
+ - use_amp: False
156
+ - warmup_proportion: 0.1
157
+ - l2_weight: 0.01
158
+ - seed: 42
159
+ - eval_max_steps: -1
160
+ - load_best_model_at_end: False
161
+
162
+ ### Training Results
163
+ | Epoch | Step | Training Loss | Validation Loss |
164
+ |:-----:|:----:|:-------------:|:---------------:|
165
+ | 0.005 | 1 | 0.4729 | - |
166
+ | 0.25 | 50 | 0.2338 | - |
167
+ | 0.5 | 100 | 0.1118 | - |
168
+ | 0.75 | 150 | 0.0188 | - |
169
+ | 1.0 | 200 | 0.003 | - |
170
+
171
+ ### Framework Versions
172
+ - Python: 3.12.3
173
+ - SetFit: 1.1.3
174
+ - Sentence Transformers: 5.3.0
175
+ - Transformers: 4.57.6
176
+ - PyTorch: 2.10.0+cpu
177
+ - Datasets: 4.8.3
178
+ - Tokenizers: 0.22.2
179
+
180
+ ## Citation
181
+
182
+ ### BibTeX
183
+ ```bibtex
184
+ @article{https://doi.org/10.48550/arxiv.2209.11055,
185
+ doi = {10.48550/ARXIV.2209.11055},
186
+ url = {https://arxiv.org/abs/2209.11055},
187
+ author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
188
+ keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
189
+ title = {Efficient Few-Shot Learning Without Prompts},
190
+ publisher = {arXiv},
191
+ year = {2022},
192
+ copyright = {Creative Commons Attribution 4.0 International}
193
+ }
194
+ ```
195
+
196
+ <!--
197
+ ## Glossary
198
+
199
+ *Clearly define terms in order to be accessible across audiences.*
200
+ -->
201
+
202
+ <!--
203
+ ## Model Card Authors
204
+
205
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
206
+ -->
207
+
208
+ <!--
209
+ ## Model Card Contact
210
+
211
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
212
+ -->
setfit_performative_outrage/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MPNetModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "dtype": "float32",
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "mpnet",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "relative_attention_num_buckets": 32,
21
+ "transformers_version": "4.57.6",
22
+ "vocab_size": 30527
23
+ }
setfit_performative_outrage/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "5.3.0",
4
+ "transformers": "4.57.6",
5
+ "pytorch": "2.10.0+cpu"
6
+ },
7
+ "model_type": "SentenceTransformer",
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
setfit_performative_outrage/config_setfit.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "labels": null,
3
+ "normalize_embeddings": false
4
+ }
setfit_performative_outrage/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9b270225a8e8e82b072cdd9727ea2be52e7ec26077cf774e165ce070e1bc397
3
+ size 437967672
setfit_performative_outrage/model_head.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20acfa973d206bea711e9b5b8d88fe657b355bedf0cd3248f5e6aa490660b1b1
3
+ size 7007
setfit_performative_outrage/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]