Spaces:
Runtime error
Runtime error
Commit ·
77e86cf
1
Parent(s): 263ccb8
Upload 3 files
Browse files- knn_model.joblib +2 -2
- nca_model.joblib +2 -2
- train_classifier.py +7 -2
knn_model.joblib
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ceaeb3f040b36abf9aadbd303aeb88f4745d1a41c620b803d6a56c3229e0dd1
|
| 3 |
+
size 5725078
|
nca_model.joblib
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5053da7e87086d6ec242c2ae73df5e0a299ab0cb2dc395c81f8cc625f84987a
|
| 3 |
+
size 57724892
|
train_classifier.py
CHANGED
|
@@ -55,7 +55,7 @@ word_vectorizer.fit(requiredText)
|
|
| 55 |
joblib.dump(word_vectorizer, 'tfidf_vectorizer.joblib')
|
| 56 |
WordFeatures = word_vectorizer.transform(requiredText)
|
| 57 |
|
| 58 |
-
nca = NeighborhoodComponentsAnalysis(n_components=
|
| 59 |
WordFeatures = nca.fit_transform(WordFeatures.toarray(), requiredTarget)
|
| 60 |
nca_filename = f'nca_model.joblib'
|
| 61 |
joblib.dump(nca, nca_filename)
|
|
@@ -72,6 +72,11 @@ print(X_test.shape)
|
|
| 72 |
# knn = KNeighborsClassifier()
|
| 73 |
# gs = GridSearchCV(estimator=knn, param_grid=param_grid, scoring="accuracy", verbose=1, cv=10, n_jobs=3)
|
| 74 |
# grid_search = gs.fit(X_train, y_train)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
# best_score = grid_search.best_score_
|
| 76 |
# best_parameters = grid_search.best_params_
|
| 77 |
# print("Best Score:", best_score)
|
|
@@ -80,7 +85,7 @@ print(X_test.shape)
|
|
| 80 |
knn = KNeighborsClassifier(n_neighbors=1,
|
| 81 |
metric='manhattan',
|
| 82 |
weights='uniform',
|
| 83 |
-
algorithm='
|
| 84 |
)
|
| 85 |
knn.fit(X_train, y_train)
|
| 86 |
|
|
|
|
| 55 |
joblib.dump(word_vectorizer, 'tfidf_vectorizer.joblib')
|
| 56 |
WordFeatures = word_vectorizer.transform(requiredText)
|
| 57 |
|
| 58 |
+
nca = NeighborhoodComponentsAnalysis(n_components=400, random_state=42)
|
| 59 |
WordFeatures = nca.fit_transform(WordFeatures.toarray(), requiredTarget)
|
| 60 |
nca_filename = f'nca_model.joblib'
|
| 61 |
joblib.dump(nca, nca_filename)
|
|
|
|
| 72 |
# knn = KNeighborsClassifier()
|
| 73 |
# gs = GridSearchCV(estimator=knn, param_grid=param_grid, scoring="accuracy", verbose=1, cv=10, n_jobs=3)
|
| 74 |
# grid_search = gs.fit(X_train, y_train)
|
| 75 |
+
# results_df = pd.DataFrame(grid_search.cv_results_)
|
| 76 |
+
# # results_df.to_excel('grid_search_results_with_nca_500.xlsx', index=False)
|
| 77 |
+
# # results_df.to_excel('grid_search_results_with_nca_400.xlsx', index=False)
|
| 78 |
+
# # results_df.to_excel('grid_search_results_with_nca_300.xlsx', index=False)
|
| 79 |
+
# # results_df.to_excel('grid_search_results_no_nca.xlsx', index=False)
|
| 80 |
# best_score = grid_search.best_score_
|
| 81 |
# best_parameters = grid_search.best_params_
|
| 82 |
# print("Best Score:", best_score)
|
|
|
|
| 85 |
knn = KNeighborsClassifier(n_neighbors=1,
|
| 86 |
metric='manhattan',
|
| 87 |
weights='uniform',
|
| 88 |
+
algorithm='kd_tree',
|
| 89 |
)
|
| 90 |
knn.fit(X_train, y_train)
|
| 91 |
|