kantundpeterpan commited on
Commit
bf34d20
·
verified ·
1 Parent(s): 05a7f14

push push push

Browse files
Files changed (2) hide show
  1. README.md +6 -6
  2. skops.yaml +52 -0
README.md CHANGED
@@ -32,16 +32,16 @@ Trained with a lot of care
32
  | Hyperparameter | Value |
33
  |-------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
34
  | memory | |
35
- | steps | [('lemmatizer', FunctionTransformer(func=<function lemmatize_X at 0x7f5c1a052ca0>)), ('tfidf', TfidfVectorizer(max_df=0.95, min_df=2,<br /> stop_words=['if', 'when', 'most', 'ourselves', 'your', 'having',<br /> "didn't", '@', "you've", 'hasn', 'at', "mightn't",<br /> "mustn't", 'these', "it's", 'our', 'had', 'll',<br /> 'too', 'this', 'by', 'it', 'further', 'wasn',<br /> 'before', 'all', '{', 'herself', 'other', 'above', ...],<br /> tokenizer=<function tokenize_quote at 0x7f5c1a09da60>)), ('rf', RandomForestClassifier())] |
36
  | transform_input | |
37
  | verbose | False |
38
- | lemmatizer | FunctionTransformer(func=<function lemmatize_X at 0x7f5c1a052ca0>) |
39
- | tfidf | TfidfVectorizer(max_df=0.95, min_df=2,<br /> stop_words=['if', 'when', 'most', 'ourselves', 'your', 'having',<br /> "didn't", '@', "you've", 'hasn', 'at', "mightn't",<br /> "mustn't", 'these', "it's", 'our', 'had', 'll',<br /> 'too', 'this', 'by', 'it', 'further', 'wasn',<br /> 'before', 'all', '{', 'herself', 'other', 'above', ...],<br /> tokenizer=<function tokenize_quote at 0x7f5c1a09da60>) |
40
  | rf | RandomForestClassifier() |
41
  | lemmatizer__accept_sparse | False |
42
  | lemmatizer__check_inverse | True |
43
  | lemmatizer__feature_names_out | |
44
- | lemmatizer__func | <function lemmatize_X at 0x7f5c1a052ca0> |
45
  | lemmatizer__inv_kw_args | |
46
  | lemmatizer__inverse_func | |
47
  | lemmatizer__kw_args | |
@@ -64,7 +64,7 @@ Trained with a lot of care
64
  | tfidf__strip_accents | |
65
  | tfidf__sublinear_tf | False |
66
  | tfidf__token_pattern | (?u)\b\w\w+\b |
67
- | tfidf__tokenizer | <function tokenize_quote at 0x7f5c1a09da60> |
68
  | tfidf__use_idf | True |
69
  | tfidf__vocabulary | |
70
  | rf__bootstrap | True |
@@ -168,7 +168,7 @@ div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,
168
  #sk-container-id-1 a.estimator_doc_link:hover {/* unfitted */background-color: var(--sklearn-color-unfitted-level-3);color: var(--sklearn-color-background);text-decoration: none;
169
  }#sk-container-id-1 a.estimator_doc_link.fitted:hover {/* fitted */background-color: var(--sklearn-color-fitted-level-3);
170
  }
171
- </style><div id="sk-container-id-1" class="sk-top-container" style="overflow: auto;"><div class="sk-text-repr-fallback"><pre>Pipeline(steps=[(&#x27;lemmatizer&#x27;,FunctionTransformer(func=&lt;function lemmatize_X at 0x7f5c1a052ca0&gt;)),(&#x27;tfidf&#x27;,TfidfVectorizer(max_df=0.95, min_df=2,stop_words=[&#x27;if&#x27;, &#x27;when&#x27;, &#x27;most&#x27;, &#x27;ourselves&#x27;,&#x27;your&#x27;, &#x27;having&#x27;, &quot;didn&#x27;t&quot;, &#x27;@&#x27;,&quot;you&#x27;ve&quot;, &#x27;hasn&#x27;, &#x27;at&#x27;, &quot;mightn&#x27;t&quot;,&quot;mustn&#x27;t&quot;, &#x27;these&#x27;, &quot;it&#x27;s&quot;, &#x27;our&#x27;,&#x27;had&#x27;, &#x27;ll&#x27;, &#x27;too&#x27;, &#x27;this&#x27;, &#x27;by&#x27;,&#x27;it&#x27;, &#x27;further&#x27;, &#x27;wasn&#x27;, &#x27;before&#x27;,&#x27;all&#x27;, &#x27;{&#x27;, &#x27;herself&#x27;, &#x27;other&#x27;,&#x27;above&#x27;, ...],tokenizer=&lt;function tokenize_quote at 0x7f5c1a09da60&gt;)),(&#x27;rf&#x27;, RandomForestClassifier())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" ><label for="sk-estimator-id-1" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>Pipeline</div></div><div><span class="sk-estimator-doc-link fitted">i<span>Fitted</span></span></div></label><div class="sk-toggleable__content fitted"><pre>Pipeline(steps=[(&#x27;lemmatizer&#x27;,FunctionTransformer(func=&lt;function lemmatize_X at 0x7f5c1a052ca0&gt;)),(&#x27;tfidf&#x27;,TfidfVectorizer(max_df=0.95, min_df=2,stop_words=[&#x27;if&#x27;, &#x27;when&#x27;, &#x27;most&#x27;, &#x27;ourselves&#x27;,&#x27;your&#x27;, &#x27;having&#x27;, &quot;didn&#x27;t&quot;, &#x27;@&#x27;,&quot;you&#x27;ve&quot;, &#x27;hasn&#x27;, &#x27;at&#x27;, &quot;mightn&#x27;t&quot;,&quot;mustn&#x27;t&quot;, &#x27;these&#x27;, &quot;it&#x27;s&quot;, &#x27;our&#x27;,&#x27;had&#x27;, &#x27;ll&#x27;, &#x27;too&#x27;, &#x27;this&#x27;, &#x27;by&#x27;,&#x27;it&#x27;, &#x27;further&#x27;, &#x27;wasn&#x27;, &#x27;before&#x27;,&#x27;all&#x27;, &#x27;{&#x27;, &#x27;herself&#x27;, &#x27;other&#x27;,&#x27;above&#x27;, ...],tokenizer=&lt;function tokenize_quote at 0x7f5c1a09da60&gt;)),(&#x27;rf&#x27;, RandomForestClassifier())])</pre></div> </div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-2" type="checkbox" ><label for="sk-estimator-id-2" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>lemmatize_X</div><div class="caption">FunctionTransformer</div></div><div><a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.6/modules/generated/sklearn.preprocessing.FunctionTransformer.html">?<span>Documentation for FunctionTransformer</span></a></div></label><div class="sk-toggleable__content fitted"><pre>FunctionTransformer(func=&lt;function lemmatize_X at 0x7f5c1a052ca0&gt;)</pre></div> </div></div><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-3" type="checkbox" ><label for="sk-estimator-id-3" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>TfidfVectorizer</div></div><div><a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.6/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html">?<span>Documentation for TfidfVectorizer</span></a></div></label><div class="sk-toggleable__content fitted"><pre>TfidfVectorizer(max_df=0.95, min_df=2,stop_words=[&#x27;if&#x27;, &#x27;when&#x27;, &#x27;most&#x27;, &#x27;ourselves&#x27;, &#x27;your&#x27;, &#x27;having&#x27;,&quot;didn&#x27;t&quot;, &#x27;@&#x27;, &quot;you&#x27;ve&quot;, &#x27;hasn&#x27;, &#x27;at&#x27;, &quot;mightn&#x27;t&quot;,&quot;mustn&#x27;t&quot;, &#x27;these&#x27;, &quot;it&#x27;s&quot;, &#x27;our&#x27;, &#x27;had&#x27;, &#x27;ll&#x27;,&#x27;too&#x27;, &#x27;this&#x27;, &#x27;by&#x27;, &#x27;it&#x27;, &#x27;further&#x27;, &#x27;wasn&#x27;,&#x27;before&#x27;, &#x27;all&#x27;, &#x27;{&#x27;, &#x27;herself&#x27;, &#x27;other&#x27;, &#x27;above&#x27;, ...],tokenizer=&lt;function tokenize_quote at 0x7f5c1a09da60&gt;)</pre></div> </div></div><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-4" type="checkbox" ><label for="sk-estimator-id-4" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>RandomForestClassifier</div></div><div><a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.6/modules/generated/sklearn.ensemble.RandomForestClassifier.html">?<span>Documentation for RandomForestClassifier</span></a></div></label><div class="sk-toggleable__content fitted"><pre>RandomForestClassifier()</pre></div> </div></div></div></div></div></div>
172
 
173
  ## Evaluation Results
174
 
 
32
  | Hyperparameter | Value |
33
  |-------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
34
  | memory | |
35
+ | steps | [('lemmatizer', FunctionTransformer(func=<function lemmatize_X at 0x7f79b376cca0>)), ('tfidf', TfidfVectorizer(max_df=0.95, min_df=2,<br /> stop_words=['if', 'when', 'most', 'ourselves', 'your', 'having',<br /> "didn't", '@', "you've", 'hasn', 'at', "mightn't",<br /> "mustn't", 'these', "it's", 'our', 'had', 'll',<br /> 'too', 'this', 'by', 'it', 'further', 'wasn',<br /> 'before', 'all', '{', 'herself', 'other', 'above', ...],<br /> tokenizer=<function tokenize_quote at 0x7f79b37b1a60>)), ('rf', RandomForestClassifier())] |
36
  | transform_input | |
37
  | verbose | False |
38
+ | lemmatizer | FunctionTransformer(func=<function lemmatize_X at 0x7f79b376cca0>) |
39
+ | tfidf | TfidfVectorizer(max_df=0.95, min_df=2,<br /> stop_words=['if', 'when', 'most', 'ourselves', 'your', 'having',<br /> "didn't", '@', "you've", 'hasn', 'at', "mightn't",<br /> "mustn't", 'these', "it's", 'our', 'had', 'll',<br /> 'too', 'this', 'by', 'it', 'further', 'wasn',<br /> 'before', 'all', '{', 'herself', 'other', 'above', ...],<br /> tokenizer=<function tokenize_quote at 0x7f79b37b1a60>) |
40
  | rf | RandomForestClassifier() |
41
  | lemmatizer__accept_sparse | False |
42
  | lemmatizer__check_inverse | True |
43
  | lemmatizer__feature_names_out | |
44
+ | lemmatizer__func | <function lemmatize_X at 0x7f79b376cca0> |
45
  | lemmatizer__inv_kw_args | |
46
  | lemmatizer__inverse_func | |
47
  | lemmatizer__kw_args | |
 
64
  | tfidf__strip_accents | |
65
  | tfidf__sublinear_tf | False |
66
  | tfidf__token_pattern | (?u)\b\w\w+\b |
67
+ | tfidf__tokenizer | <function tokenize_quote at 0x7f79b37b1a60> |
68
  | tfidf__use_idf | True |
69
  | tfidf__vocabulary | |
70
  | rf__bootstrap | True |
 
168
  #sk-container-id-1 a.estimator_doc_link:hover {/* unfitted */background-color: var(--sklearn-color-unfitted-level-3);color: var(--sklearn-color-background);text-decoration: none;
169
  }#sk-container-id-1 a.estimator_doc_link.fitted:hover {/* fitted */background-color: var(--sklearn-color-fitted-level-3);
170
  }
171
+ </style><div id="sk-container-id-1" class="sk-top-container" style="overflow: auto;"><div class="sk-text-repr-fallback"><pre>Pipeline(steps=[(&#x27;lemmatizer&#x27;,FunctionTransformer(func=&lt;function lemmatize_X at 0x7f79b376cca0&gt;)),(&#x27;tfidf&#x27;,TfidfVectorizer(max_df=0.95, min_df=2,stop_words=[&#x27;if&#x27;, &#x27;when&#x27;, &#x27;most&#x27;, &#x27;ourselves&#x27;,&#x27;your&#x27;, &#x27;having&#x27;, &quot;didn&#x27;t&quot;, &#x27;@&#x27;,&quot;you&#x27;ve&quot;, &#x27;hasn&#x27;, &#x27;at&#x27;, &quot;mightn&#x27;t&quot;,&quot;mustn&#x27;t&quot;, &#x27;these&#x27;, &quot;it&#x27;s&quot;, &#x27;our&#x27;,&#x27;had&#x27;, &#x27;ll&#x27;, &#x27;too&#x27;, &#x27;this&#x27;, &#x27;by&#x27;,&#x27;it&#x27;, &#x27;further&#x27;, &#x27;wasn&#x27;, &#x27;before&#x27;,&#x27;all&#x27;, &#x27;{&#x27;, &#x27;herself&#x27;, &#x27;other&#x27;,&#x27;above&#x27;, ...],tokenizer=&lt;function tokenize_quote at 0x7f79b37b1a60&gt;)),(&#x27;rf&#x27;, RandomForestClassifier())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" ><label for="sk-estimator-id-1" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>Pipeline</div></div><div><span class="sk-estimator-doc-link fitted">i<span>Fitted</span></span></div></label><div class="sk-toggleable__content fitted"><pre>Pipeline(steps=[(&#x27;lemmatizer&#x27;,FunctionTransformer(func=&lt;function lemmatize_X at 0x7f79b376cca0&gt;)),(&#x27;tfidf&#x27;,TfidfVectorizer(max_df=0.95, min_df=2,stop_words=[&#x27;if&#x27;, &#x27;when&#x27;, &#x27;most&#x27;, &#x27;ourselves&#x27;,&#x27;your&#x27;, &#x27;having&#x27;, &quot;didn&#x27;t&quot;, &#x27;@&#x27;,&quot;you&#x27;ve&quot;, &#x27;hasn&#x27;, &#x27;at&#x27;, &quot;mightn&#x27;t&quot;,&quot;mustn&#x27;t&quot;, &#x27;these&#x27;, &quot;it&#x27;s&quot;, &#x27;our&#x27;,&#x27;had&#x27;, &#x27;ll&#x27;, &#x27;too&#x27;, &#x27;this&#x27;, &#x27;by&#x27;,&#x27;it&#x27;, &#x27;further&#x27;, &#x27;wasn&#x27;, &#x27;before&#x27;,&#x27;all&#x27;, &#x27;{&#x27;, &#x27;herself&#x27;, &#x27;other&#x27;,&#x27;above&#x27;, ...],tokenizer=&lt;function tokenize_quote at 0x7f79b37b1a60&gt;)),(&#x27;rf&#x27;, RandomForestClassifier())])</pre></div> </div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-2" type="checkbox" ><label for="sk-estimator-id-2" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>lemmatize_X</div><div class="caption">FunctionTransformer</div></div><div><a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.6/modules/generated/sklearn.preprocessing.FunctionTransformer.html">?<span>Documentation for FunctionTransformer</span></a></div></label><div class="sk-toggleable__content fitted"><pre>FunctionTransformer(func=&lt;function lemmatize_X at 0x7f79b376cca0&gt;)</pre></div> </div></div><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-3" type="checkbox" ><label for="sk-estimator-id-3" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>TfidfVectorizer</div></div><div><a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.6/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html">?<span>Documentation for TfidfVectorizer</span></a></div></label><div class="sk-toggleable__content fitted"><pre>TfidfVectorizer(max_df=0.95, min_df=2,stop_words=[&#x27;if&#x27;, &#x27;when&#x27;, &#x27;most&#x27;, &#x27;ourselves&#x27;, &#x27;your&#x27;, &#x27;having&#x27;,&quot;didn&#x27;t&quot;, &#x27;@&#x27;, &quot;you&#x27;ve&quot;, &#x27;hasn&#x27;, &#x27;at&#x27;, &quot;mightn&#x27;t&quot;,&quot;mustn&#x27;t&quot;, &#x27;these&#x27;, &quot;it&#x27;s&quot;, &#x27;our&#x27;, &#x27;had&#x27;, &#x27;ll&#x27;,&#x27;too&#x27;, &#x27;this&#x27;, &#x27;by&#x27;, &#x27;it&#x27;, &#x27;further&#x27;, &#x27;wasn&#x27;,&#x27;before&#x27;, &#x27;all&#x27;, &#x27;{&#x27;, &#x27;herself&#x27;, &#x27;other&#x27;, &#x27;above&#x27;, ...],tokenizer=&lt;function tokenize_quote at 0x7f79b37b1a60&gt;)</pre></div> </div></div><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-4" type="checkbox" ><label for="sk-estimator-id-4" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>RandomForestClassifier</div></div><div><a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.6/modules/generated/sklearn.ensemble.RandomForestClassifier.html">?<span>Documentation for RandomForestClassifier</span></a></div></label><div class="sk-toggleable__content fitted"><pre>RandomForestClassifier()</pre></div> </div></div></div></div></div></div>
172
 
173
  ## Evaluation Results
174
 
skops.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hf_repo: kantundpeterpan/skopush-test
2
+
3
+ local_repo:
4
+ name: tmp
5
+ init: True
6
+
7
+ model_path: tfidf_rf.skops
8
+
9
+ dataset:
10
+ name: "QuotaClimat/frugalaichallenge-text-train"
11
+ source: datasets
12
+ target_col: label
13
+ evaluate_on: test
14
+
15
+ model_deps: # import dynamically before loading model, add to repo
16
+ - tools.py
17
+
18
+ deps: # import dynamically and write versions to repo init method
19
+ - scikit-learn:sklearn
20
+ - nltk:nltk
21
+
22
+ model_card:
23
+ filename: README.md
24
+ task: text-classification
25
+ description:
26
+ main: |
27
+ This model is an attempt to solve the 2025 FrugalAI challenge.
28
+ *Nice*.
29
+ Intended uses & limitations: |
30
+ Better than random label assignment, still room for improvement.
31
+ Training Procedure: |
32
+ Trained with a lot of care
33
+ sections:
34
+ A lot of info: |
35
+ Does this work?
36
+ metrics:
37
+ sklearn: # module name?
38
+ - accuracy:accuracy_score(normalize=True)
39
+ - f1_score:f1_score(average="macro")
40
+ tools:
41
+ - super_config:test_scorer(blubb=2)
42
+ confusion_matrix:
43
+ title: "Confusion Matrix"
44
+ filename: "confusion_matrix.png"
45
+ # labels: model_classes
46
+ plt:
47
+ xticks:
48
+ - rotation=90
49
+
50
+ push:
51
+ commit_message: "push push push"
52
+ create_remote: True