push push push
Browse files- README.md +6 -6
- skops.yaml +52 -0
README.md
CHANGED
|
@@ -32,16 +32,16 @@ Trained with a lot of care
|
|
| 32 |
| Hyperparameter | Value |
|
| 33 |
|-------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 34 |
| memory | |
|
| 35 |
-
| steps | [('lemmatizer', FunctionTransformer(func=<function lemmatize_X at
|
| 36 |
| transform_input | |
|
| 37 |
| verbose | False |
|
| 38 |
-
| lemmatizer | FunctionTransformer(func=<function lemmatize_X at
|
| 39 |
-
| tfidf | TfidfVectorizer(max_df=0.95, min_df=2,<br /> stop_words=['if', 'when', 'most', 'ourselves', 'your', 'having',<br /> "didn't", '@', "you've", 'hasn', 'at', "mightn't",<br /> "mustn't", 'these', "it's", 'our', 'had', 'll',<br /> 'too', 'this', 'by', 'it', 'further', 'wasn',<br /> 'before', 'all', '{', 'herself', 'other', 'above', ...],<br /> tokenizer=<function tokenize_quote at
|
| 40 |
| rf | RandomForestClassifier() |
|
| 41 |
| lemmatizer__accept_sparse | False |
|
| 42 |
| lemmatizer__check_inverse | True |
|
| 43 |
| lemmatizer__feature_names_out | |
|
| 44 |
-
| lemmatizer__func | <function lemmatize_X at
|
| 45 |
| lemmatizer__inv_kw_args | |
|
| 46 |
| lemmatizer__inverse_func | |
|
| 47 |
| lemmatizer__kw_args | |
|
|
@@ -64,7 +64,7 @@ Trained with a lot of care
|
|
| 64 |
| tfidf__strip_accents | |
|
| 65 |
| tfidf__sublinear_tf | False |
|
| 66 |
| tfidf__token_pattern | (?u)\b\w\w+\b |
|
| 67 |
-
| tfidf__tokenizer | <function tokenize_quote at
|
| 68 |
| tfidf__use_idf | True |
|
| 69 |
| tfidf__vocabulary | |
|
| 70 |
| rf__bootstrap | True |
|
|
@@ -168,7 +168,7 @@ div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,
|
|
| 168 |
#sk-container-id-1 a.estimator_doc_link:hover {/* unfitted */background-color: var(--sklearn-color-unfitted-level-3);color: var(--sklearn-color-background);text-decoration: none;
|
| 169 |
}#sk-container-id-1 a.estimator_doc_link.fitted:hover {/* fitted */background-color: var(--sklearn-color-fitted-level-3);
|
| 170 |
}
|
| 171 |
-
</style><div id="sk-container-id-1" class="sk-top-container" style="overflow: auto;"><div class="sk-text-repr-fallback"><pre>Pipeline(steps=[('lemmatizer',FunctionTransformer(func=<function lemmatize_X at
|
| 172 |
|
| 173 |
## Evaluation Results
|
| 174 |
|
|
|
|
| 32 |
| Hyperparameter | Value |
|
| 33 |
|-------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 34 |
| memory | |
|
| 35 |
+
| steps | [('lemmatizer', FunctionTransformer(func=<function lemmatize_X at 0x7f79b376cca0>)), ('tfidf', TfidfVectorizer(max_df=0.95, min_df=2,<br /> stop_words=['if', 'when', 'most', 'ourselves', 'your', 'having',<br /> "didn't", '@', "you've", 'hasn', 'at', "mightn't",<br /> "mustn't", 'these', "it's", 'our', 'had', 'll',<br /> 'too', 'this', 'by', 'it', 'further', 'wasn',<br /> 'before', 'all', '{', 'herself', 'other', 'above', ...],<br /> tokenizer=<function tokenize_quote at 0x7f79b37b1a60>)), ('rf', RandomForestClassifier())] |
|
| 36 |
| transform_input | |
|
| 37 |
| verbose | False |
|
| 38 |
+
| lemmatizer | FunctionTransformer(func=<function lemmatize_X at 0x7f79b376cca0>) |
|
| 39 |
+
| tfidf | TfidfVectorizer(max_df=0.95, min_df=2,<br /> stop_words=['if', 'when', 'most', 'ourselves', 'your', 'having',<br /> "didn't", '@', "you've", 'hasn', 'at', "mightn't",<br /> "mustn't", 'these', "it's", 'our', 'had', 'll',<br /> 'too', 'this', 'by', 'it', 'further', 'wasn',<br /> 'before', 'all', '{', 'herself', 'other', 'above', ...],<br /> tokenizer=<function tokenize_quote at 0x7f79b37b1a60>) |
|
| 40 |
| rf | RandomForestClassifier() |
|
| 41 |
| lemmatizer__accept_sparse | False |
|
| 42 |
| lemmatizer__check_inverse | True |
|
| 43 |
| lemmatizer__feature_names_out | |
|
| 44 |
+
| lemmatizer__func | <function lemmatize_X at 0x7f79b376cca0> |
|
| 45 |
| lemmatizer__inv_kw_args | |
|
| 46 |
| lemmatizer__inverse_func | |
|
| 47 |
| lemmatizer__kw_args | |
|
|
|
|
| 64 |
| tfidf__strip_accents | |
|
| 65 |
| tfidf__sublinear_tf | False |
|
| 66 |
| tfidf__token_pattern | (?u)\b\w\w+\b |
|
| 67 |
+
| tfidf__tokenizer | <function tokenize_quote at 0x7f79b37b1a60> |
|
| 68 |
| tfidf__use_idf | True |
|
| 69 |
| tfidf__vocabulary | |
|
| 70 |
| rf__bootstrap | True |
|
|
|
|
| 168 |
#sk-container-id-1 a.estimator_doc_link:hover {/* unfitted */background-color: var(--sklearn-color-unfitted-level-3);color: var(--sklearn-color-background);text-decoration: none;
|
| 169 |
}#sk-container-id-1 a.estimator_doc_link.fitted:hover {/* fitted */background-color: var(--sklearn-color-fitted-level-3);
|
| 170 |
}
|
| 171 |
+
</style><div id="sk-container-id-1" class="sk-top-container" style="overflow: auto;"><div class="sk-text-repr-fallback"><pre>Pipeline(steps=[('lemmatizer',FunctionTransformer(func=<function lemmatize_X at 0x7f79b376cca0>)),('tfidf',TfidfVectorizer(max_df=0.95, min_df=2,stop_words=['if', 'when', 'most', 'ourselves','your', 'having', "didn't", '@',"you've", 'hasn', 'at', "mightn't","mustn't", 'these', "it's", 'our','had', 'll', 'too', 'this', 'by','it', 'further', 'wasn', 'before','all', '{', 'herself', 'other','above', ...],tokenizer=<function tokenize_quote at 0x7f79b37b1a60>)),('rf', RandomForestClassifier())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" ><label for="sk-estimator-id-1" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>Pipeline</div></div><div><span class="sk-estimator-doc-link fitted">i<span>Fitted</span></span></div></label><div class="sk-toggleable__content fitted"><pre>Pipeline(steps=[('lemmatizer',FunctionTransformer(func=<function lemmatize_X at 0x7f79b376cca0>)),('tfidf',TfidfVectorizer(max_df=0.95, min_df=2,stop_words=['if', 'when', 'most', 'ourselves','your', 'having', "didn't", '@',"you've", 'hasn', 'at', "mightn't","mustn't", 'these', "it's", 'our','had', 'll', 'too', 'this', 'by','it', 'further', 'wasn', 'before','all', '{', 'herself', 'other','above', ...],tokenizer=<function tokenize_quote at 0x7f79b37b1a60>)),('rf', RandomForestClassifier())])</pre></div> </div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-2" type="checkbox" ><label for="sk-estimator-id-2" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>lemmatize_X</div><div class="caption">FunctionTransformer</div></div><div><a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.6/modules/generated/sklearn.preprocessing.FunctionTransformer.html">?<span>Documentation for FunctionTransformer</span></a></div></label><div class="sk-toggleable__content fitted"><pre>FunctionTransformer(func=<function lemmatize_X at 0x7f79b376cca0>)</pre></div> </div></div><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-3" type="checkbox" ><label for="sk-estimator-id-3" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>TfidfVectorizer</div></div><div><a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.6/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html">?<span>Documentation for TfidfVectorizer</span></a></div></label><div class="sk-toggleable__content fitted"><pre>TfidfVectorizer(max_df=0.95, min_df=2,stop_words=['if', 'when', 'most', 'ourselves', 'your', 'having',"didn't", '@', "you've", 'hasn', 'at', "mightn't","mustn't", 'these', "it's", 'our', 'had', 'll','too', 'this', 'by', 'it', 'further', 'wasn','before', 'all', '{', 'herself', 'other', 'above', ...],tokenizer=<function tokenize_quote at 0x7f79b37b1a60>)</pre></div> </div></div><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-4" type="checkbox" ><label for="sk-estimator-id-4" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>RandomForestClassifier</div></div><div><a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.6/modules/generated/sklearn.ensemble.RandomForestClassifier.html">?<span>Documentation for RandomForestClassifier</span></a></div></label><div class="sk-toggleable__content fitted"><pre>RandomForestClassifier()</pre></div> </div></div></div></div></div></div>
|
| 172 |
|
| 173 |
## Evaluation Results
|
| 174 |
|
skops.yaml
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hf_repo: kantundpeterpan/skopush-test
|
| 2 |
+
|
| 3 |
+
local_repo:
|
| 4 |
+
name: tmp
|
| 5 |
+
init: True
|
| 6 |
+
|
| 7 |
+
model_path: tfidf_rf.skops
|
| 8 |
+
|
| 9 |
+
dataset:
|
| 10 |
+
name: "QuotaClimat/frugalaichallenge-text-train"
|
| 11 |
+
source: datasets
|
| 12 |
+
target_col: label
|
| 13 |
+
evaluate_on: test
|
| 14 |
+
|
| 15 |
+
model_deps: # import dynamically before loading model, add to repo
|
| 16 |
+
- tools.py
|
| 17 |
+
|
| 18 |
+
deps: # import dynamically and write versions to repo init method
|
| 19 |
+
- scikit-learn:sklearn
|
| 20 |
+
- nltk:nltk
|
| 21 |
+
|
| 22 |
+
model_card:
|
| 23 |
+
filename: README.md
|
| 24 |
+
task: text-classification
|
| 25 |
+
description:
|
| 26 |
+
main: |
|
| 27 |
+
This model is an attempt to solve the 2025 FrugalAI challenge.
|
| 28 |
+
*Nice*.
|
| 29 |
+
Intended uses & limitations: |
|
| 30 |
+
Better than random label assignment, still room for improvement.
|
| 31 |
+
Training Procedure: |
|
| 32 |
+
Trained with a lot of care
|
| 33 |
+
sections:
|
| 34 |
+
A lot of info: |
|
| 35 |
+
Does this work?
|
| 36 |
+
metrics:
|
| 37 |
+
sklearn: # module name?
|
| 38 |
+
- accuracy:accuracy_score(normalize=True)
|
| 39 |
+
- f1_score:f1_score(average="macro")
|
| 40 |
+
tools:
|
| 41 |
+
- super_config:test_scorer(blubb=2)
|
| 42 |
+
confusion_matrix:
|
| 43 |
+
title: "Confusion Matrix"
|
| 44 |
+
filename: "confusion_matrix.png"
|
| 45 |
+
# labels: model_classes
|
| 46 |
+
plt:
|
| 47 |
+
xticks:
|
| 48 |
+
- rotation=90
|
| 49 |
+
|
| 50 |
+
push:
|
| 51 |
+
commit_message: "push push push"
|
| 52 |
+
create_remote: True
|