f64 commited on
Commit
74fa2db
·
1 Parent(s): 2438632
CTAPT.py CHANGED
@@ -4,8 +4,11 @@ import streamlit as st, pandas as pd, numpy as np
4
  st.set_page_config(page_title="Предсказание V", page_icon="🦋", layout="wide", initial_sidebar_state="expanded")
5
  # set_page_config() can only be called once per app page, and must be called as the first Streamlit command in your script.
6
 
7
- import my_static_methods as my_stm
8
- st.html(my_stm.STYLE_CORRECTION)
 
 
 
9
 
10
  st.sidebar.markdown("💎 Стартовая страница")
11
 
 
4
  st.set_page_config(page_title="Предсказание V", page_icon="🦋", layout="wide", initial_sidebar_state="expanded")
5
  # set_page_config() can only be called once per app page, and must be called as the first Streamlit command in your script.
6
 
7
+ import my_static_tools as mst
8
+ from my_static_tools import HF_tools as hft
9
+ from my_static_tools import XYZV_tools as xvt
10
+
11
+ st.html(mst.STYLE_CORRECTION)
12
 
13
  st.sidebar.markdown("💎 Стартовая страница")
14
 
my_static_methods.py DELETED
@@ -1,195 +0,0 @@
1
- from typing import Union, NamedTuple
2
- import io,os,re,sys,math,time,uuid,ctypes,pickle,random,shutil,string,urllib,decimal,datetime,itertools,traceback,collections,statistics
3
- import numpy as np, pandas as pd
4
- import plotly.express as px
5
- import huggingface_hub
6
-
7
- import sklearn #, statsmodels
8
- from sklearn import svm, neighbors, naive_bayes, neural_network, tree, ensemble, linear_model, discriminant_analysis, gaussian_process, manifold, cluster
9
- #from statsmodels.tsa import seasonal
10
-
11
- os.makedirs(".temp", exist_ok=True) # for temporary local files
12
-
13
- class HfRepo(NamedTuple):
14
- repo_id: str
15
- repo_type: str
16
- token: str
17
-
18
-
19
- ### remove decoration and popup menu button at top
20
- STYLE_CORRECTION = " ".join([
21
- "<style>",
22
- "header[data-testid='stHeader'] { display:none }",
23
- "div[data-testid='stSidebarHeader'] { display:none }",
24
- "div[data-testid='stAppViewBlockContainer'] { padding:1em }",
25
- "div[data-testid='collapsedControl'] { background-color:#EEE }",
26
- "a[href='https://streamlit.io/cloud'] { display:none }"
27
- "</style>"
28
- ])
29
-
30
- ###
31
- def pandas_info(df: pd.DataFrame) -> Union[pd.DataFrame,str]:
32
- buffer = io.StringIO()
33
- df.info(buf=buffer)
34
- str_info = buffer.getvalue()
35
- try:
36
- lines = str_info.splitlines()
37
- df = (pd.DataFrame([x.split() for x in lines[5:-2]], columns=lines[3].split()).drop('Count',axis=1).rename(columns={'Non-Null':'Non-Null Count'}))
38
- return df
39
- except Exception as ex:
40
- print(ex)
41
- return str_info
42
-
43
- ### случайные числа, для отладки например
44
- def df_random_dataframe(n_cols:int = 15, n_rows:int = 100) -> pd.DataFrame:
45
- df = pd.DataFrame(np.random.randn(n_rows, n_cols), columns=(f"col {i}" for i in range(n_cols)))
46
- return df
47
-
48
- ### обработка столбца V для дальнейшего удобства + столб T типа время
49
- def df_process_v_column(df: pd.DataFrame) -> pd.DataFrame:
50
- df = df.reset_index() #
51
- df.rename(columns = {"index": "T"}, inplace=True)
52
- df["Vis"] = df.V.map(lambda v: 0 if str(v)=="nan" else 1).astype(int)
53
- df["Vfloat"] = df.V.map(lambda v: 0 if str(v)=="nan" else str(v).replace(',', '.')).astype(float)
54
- df["Vsign"] = df.Vfloat.map(lambda v: -1 if v<0 else 1 if v>0 else 0).astype(int)
55
- df["Vposneg"] = df.Vfloat.map(lambda v: "n" if v<0 else "p" if v>0 else "o").astype(str)
56
- return df
57
-
58
- ###
59
- def save_dataframe_to_hf(repo: HfRepo, dfToSave: pd.DataFrame, new_filename: str, remote_subdir: str) -> Union[huggingface_hub.CommitInfo, Exception]:
60
- """ save dataframe to hf repo """
61
- try:
62
- local_filename = os.path.join(".temp", new_filename)
63
- #df.to_csv('compressed_data.zip', index=False, compression={'method': 'zip', 'archive_name': 'data.csv'})
64
- dfToSave.to_csv(local_filename, index=False, sep=";", encoding="utf-8") # , compression="zip"
65
- apiHF = huggingface_hub.HfApi(token=repo.token)
66
- path_in_repo = os.path.basename(local_filename)
67
- if remote_subdir:
68
- path_in_repo = f"{remote_subdir}/{path_in_repo}"
69
- commit_info = apiHF.upload_file(path_or_fileobj=local_filename, path_in_repo=path_in_repo, repo_id=repo.repo_id, repo_type=repo.repo_type)
70
- return commit_info
71
- except Exception as exSave:
72
- return exSave
73
-
74
-
75
- ###
76
- def load_dataframes_from_hf(repo: HfRepo, lstCsvFiles: list[str] = []) -> {str, pd.DataFrame}:
77
- """ load dataframes from hf """
78
- #https://huggingface.co/datasets/f64k/gaziev/blob/main/TestData3_2204_noAB_gaziev.zip
79
- dict_res = {}
80
- for fl_name in lstCsvFiles:
81
- try: file_loaded = huggingface_hub.hf_hub_download(filename=fl_name, repo_id=repo.repo_id, repo_type=repo.repo_type, token=repo.token)
82
- except: file_loaded = ""
83
- if os.path.exists(file_loaded):
84
- compress = "zip" if file_loaded.lower().endswith("zip") else None
85
- df_loaded = pd.read_csv(file_loaded, sep=";", encoding = "utf-8", compression=compress)
86
- dict_res[fl_name] = df_loaded # df_Vproc = df_process_v_column(df_loaded)
87
- return dict_res
88
-
89
- ### список CSV и ZIP файлов (c уровнем вложенности) в репозитории
90
- ### https://huggingface.co/docs/huggingface_hub/en/guides/hf_file_system
91
- def list_files_hf(repo: HfRepo) -> list[str]:
92
- """ List CSV and ZIP files in HF repo """
93
- fs = huggingface_hub.HfFileSystem(token=repo.token, use_listings_cache=False) # , skip_instance_cache=True
94
- path_hf = f"{repo.repo_type}s/{repo.repo_id}/"
95
- #lst = fs.ls(path_hf, detail=False)
96
- lstGlob = fs.glob(path_hf + "**") # map(os.path.basename, lstGlob)
97
- lstNames = [fname.replace(path_hf, "") for fname in lstGlob if fname.lower().endswith(".csv") or fname.lower().endswith(".zip")]
98
- #print(f"ПРОЧИТАНО В list_files_hf() : {lstNames=}")
99
- return lstNames
100
-
101
- ###
102
- def plotly_xyzv_scatter_gray(df3D):
103
- """ 3D plot """
104
- color_discrete_map = dict(o='rgb(230,230,230)', p='rgb(90,1,1)', n='rgb(1,1,90)')
105
- fig = px.scatter_3d(df3D, x='X', y='Y', z='Z', color="Vposneg", opacity=0.4, height=800, color_discrete_map=color_discrete_map)
106
- fig.update_scenes(
107
- xaxis={"gridcolor":"rgba(30, 0, 0, 0.2)","color":"rgb(100, 0, 0)","showbackground":False},
108
- yaxis={"gridcolor":"rgba(0, 30, 0, 0.2)","color":"rgb(0, 100, 0)","showbackground":False},
109
- zaxis={"gridcolor":"rgba(0, 0, 30, 0.2)","color":"rgb(0, 0, 100)","showbackground":False})
110
- fig.update_traces(marker_size=3)
111
- return fig
112
-
113
-
114
- # lstRepoZipFiles = ["TrainData_1504_AB_gaziev.zip","TestData_1504_AB_gaziev.zip","TestData3_2204_noAB_gaziev.zip"]
115
- ### returns (classifier_object, df_train_with_predict, time_elapsed)
116
- def GetClassifier(lstDfOriginal, nHystorySteps) :
117
- #lstDfOriginal = [df_9125_Train, df_12010_Test, df_9051_Test3]
118
- nShift = nHystorySteps
119
- nCurrShift = nHystorySteps
120
- classifierName = "DecisionTreeClassifier"
121
- colsVectorInp = ["X","Y","Z"]
122
- fieldY = "Vis" #
123
- lstDataFrames = MakeHystoryColumns(lstDfOriginal, nShift)
124
- df_train = pd.concat(lstDataFrames)
125
- lstColsShift = [f"{c}-{i}" for i in range(1, nCurrShift+1) for c in colsVectorInp] # для nCurrShift=0 lstColsShift=[]
126
- colsVectorInpAll = colsVectorInp + lstColsShift
127
- y_train = df_train[fieldY]
128
- x_train_vect = df_train[colsVectorInpAll]
129
- dictClassifiers = createDictClassifiers_BestForXYZ()
130
- classifierObject = dictClassifiers[classifierName]
131
- start2 = time.time()
132
- classifierObject.fit(x_train_vect, y_train) # процесс обучения
133
- time_elapsed = time.time() - start2
134
- y_pred = classifierObject.predict(x_train_vect.values) # .values[:,::-1] поля XYZ и истории в обратном порядке
135
- df_train[f"predict_{fieldY}"] = y_pred
136
- return (classifierObject, df_train, time_elapsed)
137
-
138
- #
139
- def MakeHystoryColumns(lstDfOriginal, nShift) :
140
- lstDataframesShifted = [df.copy() for df in lstDfOriginal]
141
- lstColsShift = []
142
- for i in range(1, nShift+1):
143
- #cols = ["X","Y","Z"]+["A","B"]
144
- cols = ["X","Y","Z"]
145
- #cols = ["A","B"]
146
- for c in cols:
147
- for dfShift in lstDataframesShifted:
148
- dfShift[f'{c}-{i}'] = dfShift[c].shift(i).fillna(0)
149
- lstColsShift.append(lstDataframesShifted[0].columns[-1])
150
- print(lstColsShift)
151
- return lstDataframesShifted
152
-
153
- RANDOM_STATE=11
154
-
155
- def createDictClassifiers_BestForXYZ() :
156
- dictFastTree = {
157
- #"RandomForestClassifier": ensemble.RandomForestClassifier(random_state=RANDOM_STATE), # совсем плохие показатели
158
- #"ExtraTreeClassifier": tree.ExtraTreeClassifier(random_state=RANDOM_STATE), #
159
- "DecisionTreeClassifier": tree.DecisionTreeClassifier(random_state=RANDOM_STATE), # лучший по последним баллам
160
- }
161
- #return {**dictFast}
162
- #return {**dict_Test_MLPClassifier}
163
- #return {**dictFast, **dictLongTrain}
164
- return {**dictFastTree}
165
-
166
-
167
-
168
-
169
- #import joblib
170
- #REPO_ID = "YOUR_REPO_ID"
171
- #FILENAME = "sklearn_model.joblib"
172
- #model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=FILENAME))
173
-
174
-
175
-
176
-
177
- if False:
178
- if False:
179
- # https://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html
180
- scaler = sklearn.preprocessing.StandardScaler()
181
- #scaler = sklearn.preprocessing.PowerTransformer()
182
- #scaler = sklearn.preprocessing.RobustScaler()
183
- #scaler = sklearn.preprocessing.MinMaxScaler() # https://scikit-learn.org/1.1/modules/generated/sklearn.preprocessing.MinMaxScaler.html#sklearn.preprocessing.MinMaxScaler
184
- #scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(-1,1))
185
- #scaler = sklearn.preprocessing.QuantileTransformer()
186
- #scaler = sklearn.preprocessing.QuantileTransformer(output_distribution="normal")
187
- #scaler = sklearn.preprocessing.Normalizer() # всё на сферу кладёт - приводит к 1 длину вектора
188
- scale_columns = ["X","Y","Z"]
189
- scaledData = scaler.fit_transform(df3D[scale_columns])
190
- if False:
191
- scaler2 = sklearn.preprocessing.Normalizer()
192
- scaledData = scaler2.fit_transform(scaledData)
193
- df3D_Scaled = pd.DataFrame(data=scaledData, columns=scale_columns)
194
- df3D_Scaled["Vposneg"] = df3D["Vposneg"]
195
- df3D = df3D_Scaled
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
my_static_tools.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Union, NamedTuple
2
+ import io,os,re,sys,math,time,uuid,ctypes,pickle,random,shutil,string,urllib,decimal,datetime,itertools,traceback,collections,statistics
3
+ import numpy as np, pandas as pd
4
+ import plotly.express as px
5
+ import huggingface_hub
6
+
7
+ import sklearn #, statsmodels
8
+ from sklearn import svm, neighbors, naive_bayes, neural_network, tree, ensemble, linear_model, discriminant_analysis, gaussian_process, manifold, cluster
9
+ #from statsmodels.tsa import seasonal
10
+
11
+ os.makedirs(".temp", exist_ok=True) # for temporary local files
12
+
13
+ """ remove decoration and popup menu button at top """
14
+ STYLE_CORRECTION = " ".join([
15
+ "<style>",
16
+ "header[data-testid='stHeader'] { display:none }",
17
+ "div[data-testid='stSidebarHeader'] { display:none }",
18
+ "div[data-testid='stAppViewBlockContainer'] { padding:1em }",
19
+ "div[data-testid='collapsedControl'] { background-color:#EEE }",
20
+ "a[href='https://streamlit.io/cloud'] { display:none }"
21
+ "</style>"
22
+ ])
23
+
24
+ ###
25
+ def pandas_info(df: pd.DataFrame) -> Union[pd.DataFrame,str]:
26
+ buffer = io.StringIO()
27
+ df.info(buf=buffer)
28
+ str_info = buffer.getvalue()
29
+ try:
30
+ lines = str_info.splitlines()
31
+ df = (pd.DataFrame([x.split() for x in lines[5:-2]], columns=lines[3].split()).drop('Count',axis=1).rename(columns={'Non-Null':'Non-Null Count'}))
32
+ return df
33
+ except Exception as ex:
34
+ print(ex)
35
+ return str_info
36
+
37
+
38
+ def pandas_random_dataframe(n_cols:int = 15, n_rows:int = 100) -> pd.DataFrame:
39
+ """ create random dataframe - случайные числа, для отладки например """
40
+ df = pd.DataFrame(np.random.randn(n_rows, n_cols), columns=(f"col {i}" for i in range(n_cols)))
41
+ return df
42
+
43
+
44
+ ########################################################################################################################################
45
+
46
+
47
+ class HfRepo(NamedTuple):
48
+ repo_id: str
49
+ repo_type: str
50
+ token: str
51
+
52
+ class HF_tools:
53
+ """ Huggingface tools """
54
+
55
+ def list_models_spaces(token: str, author = 'f64k'):
56
+ """ list models and spaces """
57
+ api = huggingface_hub.HfApi(token=token)
58
+ #spaces = api.list_spaces(author=author)
59
+ models = api.list_models(author=author)
60
+ datasets = api.list_datasets(author=author)
61
+ lstResult = list(datasets) + list(models)
62
+ lstResult = [ {"id": i.id, "type": type(i).__name__, "private": i.private, "tags": i.tags} for i in lstResult]
63
+ return lstResult
64
+
65
+ def save_dataframe_to_hf(repo: HfRepo, dfToSave: pd.DataFrame, new_filename: str, remote_subdir: str) -> Union[huggingface_hub.CommitInfo, Exception]:
66
+ """ save dataframe to hf repo """
67
+ try:
68
+ local_filename = os.path.join(".temp", new_filename)
69
+ #df.to_csv('compressed_data.zip', index=False, compression={'method': 'zip', 'archive_name': 'data.csv'})
70
+ dfToSave.to_csv(local_filename, index=False, sep=";", encoding="utf-8") # , compression="zip"
71
+ apiHF = huggingface_hub.HfApi(token=repo.token)
72
+ path_in_repo = os.path.basename(local_filename)
73
+ if remote_subdir:
74
+ path_in_repo = f"{remote_subdir}/{path_in_repo}"
75
+ commit_info = apiHF.upload_file(path_or_fileobj=local_filename, path_in_repo=path_in_repo, repo_id=repo.repo_id, repo_type=repo.repo_type)
76
+ return commit_info
77
+ except Exception as exSave:
78
+ return exSave
79
+
80
+ def load_dataframes_from_hf(repo: HfRepo, lstCsvFiles: list[str] = []) -> {str, pd.DataFrame}:
81
+ """ load dataframes from hf """
82
+ #https://huggingface.co/datasets/f64k/gaziev/blob/main/TestData3_2204_noAB_gaziev.zip
83
+ dict_res = {}
84
+ for fl_name in lstCsvFiles:
85
+ try: file_loaded = huggingface_hub.hf_hub_download(filename=fl_name, repo_id=repo.repo_id, repo_type=repo.repo_type, token=repo.token)
86
+ except: file_loaded = ""
87
+ if os.path.exists(file_loaded):
88
+ compress = "zip" if file_loaded.lower().endswith("zip") else None
89
+ df_loaded = pd.read_csv(file_loaded, sep=";", encoding = "utf-8", compression=compress)
90
+ dict_res[fl_name] = df_loaded # df_Vproc = df_process_v_column(df_loaded)
91
+ return dict_res
92
+
93
+ def list_files_hf(repo: HfRepo) -> list[str]:
94
+ """ List CSV and ZIP files in HF repo - список CSV и ZIP файлов (c уровнем вложенности) в репозитории """
95
+ ### https://huggingface.co/docs/huggingface_hub/en/guides/hf_file_system
96
+ fs = huggingface_hub.HfFileSystem(token=repo.token, use_listings_cache=False) # , skip_instance_cache=True
97
+ path_hf = f"{repo.repo_type}s/{repo.repo_id}/"
98
+ #lst = fs.ls(path_hf, detail=False)
99
+ lstGlob = fs.glob(path_hf + "**") # map(os.path.basename, lstGlob)
100
+ lstNames = [fname.replace(path_hf, "") for fname in lstGlob if fname.lower().endswith(".csv") or fname.lower().endswith(".zip")]
101
+ #print(f"ПРОЧИТАН�� В list_files_hf() : {lstNames=}")
102
+ return lstNames
103
+
104
+
105
+ ########################################################################################################################################
106
+
107
+
108
+ RANDOM_STATE=11
109
+
110
+ class XYZV_tools:
111
+ """ XYZV tools - для данных в специальном формате """
112
+
113
+ def df_process_v_column(df: pd.DataFrame) -> pd.DataFrame:
114
+ """ обработка столбца V для дальнейшего удобства + столб T типа время """
115
+ df = df.reset_index() #
116
+ df.rename(columns = {"index": "T"}, inplace=True)
117
+ df["Vis"] = df.V.map(lambda v: 0 if str(v)=="nan" else 1).astype(int)
118
+ df["Vfloat"] = df.V.map(lambda v: 0 if str(v)=="nan" else str(v).replace(',', '.')).astype(float)
119
+ df["Vsign"] = df.Vfloat.map(lambda v: -1 if v<0 else 1 if v>0 else 0).astype(int)
120
+ df["Vposneg"] = df.Vfloat.map(lambda v: "n" if v<0 else "p" if v>0 else "o").astype(str)
121
+ return df
122
+
123
+ @staticmethod
124
+ def CreateDictClassifiers_BestForXYZ() :
125
+ dictFastTree = {
126
+ #"RandomForestClassifier": ensemble.RandomForestClassifier(random_state=RANDOM_STATE), # совсем плохие показатели
127
+ #"ExtraTreeClassifier": tree.ExtraTreeClassifier(random_state=RANDOM_STATE), #
128
+ "DecisionTreeClassifier": tree.DecisionTreeClassifier(random_state=RANDOM_STATE), # лучший по последним баллам
129
+ }
130
+ #return {**dictFast}
131
+ #return {**dict_Test_MLPClassifier}
132
+ #return {**dictFast, **dictLongTrain}
133
+ return {**dictFastTree}
134
+
135
+ # lstRepoZipFiles = ["TrainData_1504_AB_gaziev.zip","TestData_1504_AB_gaziev.zip","TestData3_2204_noAB_gaziev.zip"]
136
+ ### returns (classifier_object, df_train_with_predict, time_elapsed)
137
+ def GetClassifier(lstDfOriginal, nHystorySteps) :
138
+ #lstDfOriginal = [df_9125_Train, df_12010_Test, df_9051_Test3]
139
+ nShift = nHystorySteps
140
+ nCurrShift = nHystorySteps
141
+ classifierName = "DecisionTreeClassifier"
142
+ colsVectorInp = ["X","Y","Z"]
143
+ fieldY = "Vis" #
144
+ lstDataFrames = XYZV_tools.MakeHystoryColumns(lstDfOriginal, nShift)
145
+ df_train = pd.concat(lstDataFrames)
146
+ lstColsShift = [f"{c}-{i}" for i in range(1, nCurrShift+1) for c in colsVectorInp] # для nCurrShift=0 lstColsShift=[]
147
+ colsVectorInpAll = colsVectorInp + lstColsShift
148
+ y_train = df_train[fieldY]
149
+ x_train_vect = df_train[colsVectorInpAll]
150
+ dictClassifiers = XYZV_tools.CreateDictClassifiers_BestForXYZ()
151
+ classifierObject = dictClassifiers[classifierName]
152
+ start2 = time.time()
153
+ classifierObject.fit(x_train_vect, y_train) # процесс обучения
154
+ time_elapsed = time.time() - start2
155
+ y_pred = classifierObject.predict(x_train_vect.values) # .values[:,::-1] поля XYZ и истории в обратном порядке
156
+ df_train[f"predict_{fieldY}"] = y_pred
157
+ return (classifierObject, df_train, time_elapsed)
158
+
159
+ #
160
+ def MakeHystoryColumns(lstDfOriginal, nShift) :
161
+ lstDataframesShifted = [df.copy() for df in lstDfOriginal]
162
+ lstColsShift = []
163
+ for i in range(1, nShift+1):
164
+ #cols = ["X","Y","Z"]+["A","B"]
165
+ cols = ["X","Y","Z"]
166
+ #cols = ["A","B"]
167
+ for c in cols:
168
+ for dfShift in lstDataframesShifted:
169
+ dfShift[f'{c}-{i}'] = dfShift[c].shift(i).fillna(0)
170
+ lstColsShift.append(lstDataframesShifted[0].columns[-1])
171
+ print(lstColsShift)
172
+ return lstDataframesShifted
173
+
174
+ ###
175
+ def plotly_xyzv_scatter_gray(df3D):
176
+ """ 3D plot """
177
+ color_discrete_map = dict(o='rgb(230,230,230)', p='rgb(90,1,1)', n='rgb(1,1,90)')
178
+ fig = px.scatter_3d(df3D, x='X', y='Y', z='Z', color="Vposneg", opacity=0.4, height=800, color_discrete_map=color_discrete_map)
179
+ fig.update_scenes(
180
+ xaxis={"gridcolor":"rgba(30, 0, 0, 0.2)","color":"rgb(100, 0, 0)","showbackground":False},
181
+ yaxis={"gridcolor":"rgba(0, 30, 0, 0.2)","color":"rgb(0, 100, 0)","showbackground":False},
182
+ zaxis={"gridcolor":"rgba(0, 0, 30, 0.2)","color":"rgb(0, 0, 100)","showbackground":False})
183
+ fig.update_traces(marker_size=3)
184
+ return fig
185
+
186
+
187
+
188
+ ########################################################################################################################################
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+ #import joblib
198
+ #REPO_ID = "YOUR_REPO_ID"
199
+ #FILENAME = "sklearn_model.joblib"
200
+ #model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=FILENAME))
201
+
202
+
203
+ if False:
204
+ if False:
205
+ # https://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html
206
+ scaler = sklearn.preprocessing.StandardScaler()
207
+ #scaler = sklearn.preprocessing.PowerTransformer()
208
+ #scaler = sklearn.preprocessing.RobustScaler()
209
+ #scaler = sklearn.preprocessing.MinMaxScaler() # https://scikit-learn.org/1.1/modules/generated/sklearn.preprocessing.MinMaxScaler.html#sklearn.preprocessing.MinMaxScaler
210
+ #scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(-1,1))
211
+ #scaler = sklearn.preprocessing.QuantileTransformer()
212
+ #scaler = sklearn.preprocessing.QuantileTransformer(output_distribution="normal")
213
+ #scaler = sklearn.preprocessing.Normalizer() # всё на сферу кладёт - приводит к 1 длину вектора
214
+ scale_columns = ["X","Y","Z"]
215
+ scaledData = scaler.fit_transform(df3D[scale_columns])
216
+ if False:
217
+ scaler2 = sklearn.preprocessing.Normalizer()
218
+ scaledData = scaler2.fit_transform(scaledData)
219
+ df3D_Scaled = pd.DataFrame(data=scaledData, columns=scale_columns)
220
+ df3D_Scaled["Vposneg"] = df3D["Vposneg"]
221
+ df3D = df3D_Scaled
222
+
223
+
224
+
pages/2_TECT_IDXYZ.py CHANGED
@@ -1,9 +1,12 @@
1
  import os, re, sys, time, math, shutil, urllib, string, random, pickle, zipfile, datetime, platform
2
  import streamlit as st, pandas as pd, numpy as np
3
- import my_static_methods as my_stm
 
 
 
4
 
5
  if True:
6
- st.html(my_stm.STYLE_CORRECTION)
7
  dirParams = {
8
  "os.getcwd": os.getcwd(),
9
  "cpu_count": os.cpu_count(),
@@ -23,20 +26,20 @@ if True:
23
  #st.sidebar.write(dirParams)
24
 
25
 
26
- REPO = my_stm.HfRepo("f64k/gaziev", "dataset", st.secrets["HF_WRITE"])
27
- lstRepoFiles = my_stm.list_files_hf(REPO) # список уже имеющихся в репозитории файлов
28
  dictTestFilesIdXyz = {f.upper().replace("ID_XYZ/",""): f.upper() for f in lstRepoFiles if f.upper().startswith("ID_XYZ/")}
29
 
30
  @st.cache_data
31
  def GetListOf_XYZV_ToTrainClassifier(repo):
32
  lstRepoZipFiles = ["TrainData_1504_AB_gaziev.zip","TestData_1504_AB_gaziev.zip","TestData3_2204_noAB_gaziev.zip"]
33
- dictTrainThreeDataframes = my_stm.load_dataframes_from_hf(repo, lstRepoZipFiles)
34
- lstDfOriginal = [my_stm.df_process_v_column(df) for df in dictTrainThreeDataframes.values()]
35
  return lstDfOriginal
36
 
37
  @st.cache_data
38
  def GetCachedClassifier(lstDfOriginal, nHystorySteps):
39
- classifier_object, df_train_with_predict, time_elapsed = my_stm.GetClassifier(lstDfOriginal, nHystorySteps)
40
  #st.session_state.df_train_with_predict = df_train_with_predict
41
  columns_xyzv = [c for c in df_train_with_predict.columns if "Vis" in c] + [c for c in df_train_with_predict.columns if c[0] in "XYZ"]
42
  st.session_state.df_train_with_predict = df_train_with_predict[columns_xyzv]
@@ -67,7 +70,7 @@ def DescriptionMarkdown() -> str:
67
  """
68
 
69
  def save_dataframe_nodialog_idxyz(new_filename, dfToSave):
70
- commit_info = my_stm.save_dataframe_to_hf(REPO, dfToSave, new_filename, "ID_XYZ")
71
  st.toast(commit_info, icon='🆕')
72
  ReRun()
73
 
@@ -124,13 +127,13 @@ with col1.popover("🆕 добавить новый файл", use_container_wid
124
  st.error(f"Столбцы не ID;X;Y;Z ! Наблюдаем столбцы : {colnames}")
125
 
126
  # список уже имеющихся в репозитории файлов. повторное чтение
127
- lstRepoFiles = my_stm.list_files_hf(REPO)
128
  dictTestFilesIdXyz = {f.upper().replace("ID_XYZ/",""): f.upper() for f in lstRepoFiles if f.upper().startswith("ID_XYZ/")}
129
  selectedFile = col1.radio("📰 загруженные тестовые пакеты", dictTestFilesIdXyz.keys(), index=None)
130
 
131
  # выбран файл для предсказания
132
  if selectedFile is not None:
133
- dict_ONE_IDXYZ = my_stm.load_dataframes_from_hf(REPO, [dictTestFilesIdXyz[selectedFile]])
134
  if len(dict_ONE_IDXYZ) > 0:
135
  df_idxyz = list(dict_ONE_IDXYZ.values())[0]
136
  dfShow = df_idxyz
 
1
  import os, re, sys, time, math, shutil, urllib, string, random, pickle, zipfile, datetime, platform
2
  import streamlit as st, pandas as pd, numpy as np
3
+ import my_static_tools as mst
4
+ from my_static_tools import HF_tools as hft
5
+ from my_static_tools import XYZV_tools as xvt
6
+
7
 
8
  if True:
9
+ st.html(mst.STYLE_CORRECTION)
10
  dirParams = {
11
  "os.getcwd": os.getcwd(),
12
  "cpu_count": os.cpu_count(),
 
26
  #st.sidebar.write(dirParams)
27
 
28
 
29
+ REPO = mst.HfRepo("f64k/gaziev", "dataset", st.secrets["HF_WRITE"])
30
+ lstRepoFiles = hft.list_files_hf(REPO) # список уже имеющихся в репозитории файлов
31
  dictTestFilesIdXyz = {f.upper().replace("ID_XYZ/",""): f.upper() for f in lstRepoFiles if f.upper().startswith("ID_XYZ/")}
32
 
33
  @st.cache_data
34
  def GetListOf_XYZV_ToTrainClassifier(repo):
35
  lstRepoZipFiles = ["TrainData_1504_AB_gaziev.zip","TestData_1504_AB_gaziev.zip","TestData3_2204_noAB_gaziev.zip"]
36
+ dictTrainThreeDataframes = hft.load_dataframes_from_hf(repo, lstRepoZipFiles)
37
+ lstDfOriginal = [xvt.df_process_v_column(df) for df in dictTrainThreeDataframes.values()]
38
  return lstDfOriginal
39
 
40
  @st.cache_data
41
  def GetCachedClassifier(lstDfOriginal, nHystorySteps):
42
+ classifier_object, df_train_with_predict, time_elapsed = xvt.GetClassifier(lstDfOriginal, nHystorySteps)
43
  #st.session_state.df_train_with_predict = df_train_with_predict
44
  columns_xyzv = [c for c in df_train_with_predict.columns if "Vis" in c] + [c for c in df_train_with_predict.columns if c[0] in "XYZ"]
45
  st.session_state.df_train_with_predict = df_train_with_predict[columns_xyzv]
 
70
  """
71
 
72
  def save_dataframe_nodialog_idxyz(new_filename, dfToSave):
73
+ commit_info = hft.save_dataframe_to_hf(REPO, dfToSave, new_filename, "ID_XYZ")
74
  st.toast(commit_info, icon='🆕')
75
  ReRun()
76
 
 
127
  st.error(f"Столбцы не ID;X;Y;Z ! Наблюдаем столбцы : {colnames}")
128
 
129
  # список уже имеющихся в репозитории файлов. повторное чтение
130
+ lstRepoFiles = hft.list_files_hf(REPO)
131
  dictTestFilesIdXyz = {f.upper().replace("ID_XYZ/",""): f.upper() for f in lstRepoFiles if f.upper().startswith("ID_XYZ/")}
132
  selectedFile = col1.radio("📰 загруженные тестовые пакеты", dictTestFilesIdXyz.keys(), index=None)
133
 
134
  # выбран файл для предсказания
135
  if selectedFile is not None:
136
+ dict_ONE_IDXYZ = hft.load_dataframes_from_hf(REPO, [dictTestFilesIdXyz[selectedFile]])
137
  if len(dict_ONE_IDXYZ) > 0:
138
  df_idxyz = list(dict_ONE_IDXYZ.values())[0]
139
  dfShow = df_idxyz
pages/4_Загрузка Просмотр CSV.py CHANGED
@@ -1,9 +1,13 @@
1
  import os, csv, json
2
  import streamlit as st, pandas as pd, numpy as np
3
  import huggingface_hub
4
- import my_static_methods as my_stm
5
- st.html(my_stm.STYLE_CORRECTION)
6
- REPO = my_stm.HfRepo("f64k/gaziev", "dataset", st.secrets["HF_WRITE"])
 
 
 
 
7
 
8
  st.sidebar.markdown("# 💾 Загрузка")
9
  st.sidebar.markdown("# 🧊 Просмотр")
@@ -21,7 +25,7 @@ def save_dataframe_dialog_start(new_filename, dfToSave):
21
  cnt_msg = st.container()
22
  d_cols = st.columns(2)
23
  if d_cols[1].button("Сохранить"):
24
- commit_info = my_stm.save_dataframe_to_hf(REPO, dfToSave, new_filename, "XYZ_CSV")
25
  cnt_msg.write(commit_info)
26
  #st_rerun()
27
  if d_cols[0].button("Закрыть"):
@@ -68,23 +72,23 @@ with tab1:
68
  if col1.button(f"можно сохранить на сервер файл '{fileXYZ}'"): # .to_excel(fileXlsName)
69
  save_dataframe_dialog_start(fileXYZ, df)
70
  col2.write(df.describe())
71
- dfinfo = my_stm.pandas_info(df)
72
  col2.write(dfinfo)
73
 
74
  #col1.write(df.aggregate(["mean","median","prod","sum","std","var"]))
75
 
76
- lstTestFiles = [f for f in my_stm.list_files_hf(REPO) if not f.upper().startswith("ID_XYZ/")]
77
 
78
  with tab2:
79
  cols = tab2.columns(2)
80
  #key_xyz = st.selectbox("Выберите файл данных для просмотра таблицы и точек", dictXYZV.keys())
81
  one_file_selected = cols[0].selectbox("Выберите файл данных для просмотра таблицы и точек", lstTestFiles)
82
  if one_file_selected:
83
- dict_ONE_XYZV = my_stm.load_dataframes_from_hf(REPO, [one_file_selected])
84
  if len(dict_ONE_XYZV) > 0:
85
  df_xyz = list(dict_ONE_XYZV.values())[0] #df_xyz = dictXYZV[key_xyz]
86
- df_xyz_vproc = my_stm.df_process_v_column(df_xyz)
87
- fig = my_stm.plotly_xyzv_scatter_gray(df_xyz_vproc)
88
  cols[0].plotly_chart(fig) # st.plotly_chart(fig) #fig.show()
89
  lstValues = sorted(set(df_xyz.notna().sum()), reverse=True)
90
  cols[1].info(f"Bceгo записей : {lstValues[0]}; Значений V : {lstValues[1]}")
 
1
  import os, csv, json
2
  import streamlit as st, pandas as pd, numpy as np
3
  import huggingface_hub
4
+
5
+ import my_static_tools as mst
6
+ from my_static_tools import HF_tools as hft
7
+ from my_static_tools import XYZV_tools as xvt
8
+
9
+ st.html(mst.STYLE_CORRECTION)
10
+ REPO = mst.HfRepo("f64k/gaziev", "dataset", st.secrets["HF_WRITE"])
11
 
12
  st.sidebar.markdown("# 💾 Загрузка")
13
  st.sidebar.markdown("# 🧊 Просмотр")
 
25
  cnt_msg = st.container()
26
  d_cols = st.columns(2)
27
  if d_cols[1].button("Сохранить"):
28
+ commit_info = hft.save_dataframe_to_hf(REPO, dfToSave, new_filename, "XYZ_CSV")
29
  cnt_msg.write(commit_info)
30
  #st_rerun()
31
  if d_cols[0].button("Закрыть"):
 
72
  if col1.button(f"можно сохранить на сервер файл '{fileXYZ}'"): # .to_excel(fileXlsName)
73
  save_dataframe_dialog_start(fileXYZ, df)
74
  col2.write(df.describe())
75
+ dfinfo = mst.pandas_info(df)
76
  col2.write(dfinfo)
77
 
78
  #col1.write(df.aggregate(["mean","median","prod","sum","std","var"]))
79
 
80
+ lstTestFiles = [f for f in hft.list_files_hf(REPO) if not f.upper().startswith("ID_XYZ/")]
81
 
82
  with tab2:
83
  cols = tab2.columns(2)
84
  #key_xyz = st.selectbox("Выберите файл данных для просмотра таблицы и точек", dictXYZV.keys())
85
  one_file_selected = cols[0].selectbox("Выберите файл данных для просмотра таблицы и точек", lstTestFiles)
86
  if one_file_selected:
87
+ dict_ONE_XYZV = hft.load_dataframes_from_hf(REPO, [one_file_selected])
88
  if len(dict_ONE_XYZV) > 0:
89
  df_xyz = list(dict_ONE_XYZV.values())[0] #df_xyz = dictXYZV[key_xyz]
90
+ df_xyz_vproc = xvt.df_process_v_column(df_xyz)
91
+ fig = xvt.plotly_xyzv_scatter_gray(df_xyz_vproc)
92
  cols[0].plotly_chart(fig) # st.plotly_chart(fig) #fig.show()
93
  lstValues = sorted(set(df_xyz.notna().sum()), reverse=True)
94
  cols[1].info(f"Bceгo записей : {lstValues[0]}; Значений V : {lstValues[1]}")
pages/6_Chat.py CHANGED
@@ -1,14 +1,18 @@
1
  import os, re, sys, time, math, shutil, urllib, string, random, pickle, zipfile, datetime
2
  import streamlit as st, pandas as pd, numpy as np
3
- import my_static_methods as my_stm
4
  from faker import Faker
5
 
 
 
 
 
 
6
  # https://docs.gspread.org/en/latest/user-guide.html#updating-cells
7
  # https://docs.streamlit.io/develop/tutorials/databases/private-gsheet
8
  # https://github.com/streamlit/gsheets-connection/blob/main/examples/pages/Service_Account_Example.py#L167
9
  #from streamlit_gsheets import GSheetsConnection # st-gsheets-connection - конфликт с pandas 2.2.2
10
 
11
- st.html(my_stm.STYLE_CORRECTION)
12
  st.sidebar.markdown("# Переговоры 💬")
13
 
14
  # Create a connection object.
@@ -16,8 +20,8 @@ st.sidebar.markdown("# Переговоры 💬")
16
  #st.write(conn)
17
  #st.help(conn)
18
 
19
- REPO = my_stm.HfRepo("f64k/gaziev", "dataset", st.secrets["HF_WRITE"])
20
- lstRepoFiles = my_stm.list_files_hf(REPO) # список уже имеющихся в репозитории файлов
21
  dictTestFilesIdXyz = {f.upper().replace("ID_XYZ/",""): f.upper() for f in lstRepoFiles if f.upper().startswith("ID_XYZ/")}
22
 
23
  """
 
1
  import os, re, sys, time, math, shutil, urllib, string, random, pickle, zipfile, datetime
2
  import streamlit as st, pandas as pd, numpy as np
 
3
  from faker import Faker
4
 
5
+ import my_static_tools as mst
6
+ from my_static_tools import HF_tools as hft
7
+ from my_static_tools import XYZV_tools as xvt
8
+
9
+
10
  # https://docs.gspread.org/en/latest/user-guide.html#updating-cells
11
  # https://docs.streamlit.io/develop/tutorials/databases/private-gsheet
12
  # https://github.com/streamlit/gsheets-connection/blob/main/examples/pages/Service_Account_Example.py#L167
13
  #from streamlit_gsheets import GSheetsConnection # st-gsheets-connection - конфликт с pandas 2.2.2
14
 
15
+ st.html(mst.STYLE_CORRECTION)
16
  st.sidebar.markdown("# Переговоры 💬")
17
 
18
  # Create a connection object.
 
20
  #st.write(conn)
21
  #st.help(conn)
22
 
23
+ REPO = mst.HfRepo("f64k/gaziev", "dataset", st.secrets["HF_WRITE"])
24
+ lstRepoFiles = hft.list_files_hf(REPO) # список уже имеющихся в репозитории файлов
25
  dictTestFilesIdXyz = {f.upper().replace("ID_XYZ/",""): f.upper() for f in lstRepoFiles if f.upper().startswith("ID_XYZ/")}
26
 
27
  """
pages/9_Таблица_результатов.py CHANGED
@@ -1,11 +1,11 @@
1
  import streamlit as st, pandas as pd, numpy as np
2
- import my_static_methods as my_stm
3
- st.markdown(my_stm.STYLE_CORRECTION, unsafe_allow_html=True)
4
 
5
  st.sidebar.markdown("### просто таблица случайных чисел - пока заглушка ❄️")
6
 
7
  pop = st.popover("Open popover")
8
- pop.dataframe(my_stm.df_random_dataframe())
9
 
10
  expand = st.sidebar.expander("My label", icon=":material/info:")
11
  expand.write("Inside the expander.")
 
1
  import streamlit as st, pandas as pd, numpy as np
2
+ import my_static_tools as mst
3
+ st.markdown(mst.STYLE_CORRECTION, unsafe_allow_html=True)
4
 
5
  st.sidebar.markdown("### просто таблица случайных чисел - пока заглушка ❄️")
6
 
7
  pop = st.popover("Open popover")
8
+ pop.dataframe(mst.pandas_random_dataframe())
9
 
10
  expand = st.sidebar.expander("My label", icon=":material/info:")
11
  expand.write("Inside the expander.")
pages/old/_plotly_graph_objs.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st, pandas as pd, numpy as np
2
- import my_static_methods as my_stm
3
- st.html(my_stm.STYLE_CORRECTION)
4
 
5
  import plotly.figure_factory as ff
6
  import plotly.graph_objs as go
 
1
  import streamlit as st, pandas as pd, numpy as np
2
+ import my_static_tools as mst
3
+ st.html(mst.STYLE_CORRECTION)
4
 
5
  import plotly.figure_factory as ff
6
  import plotly.graph_objs as go
pages/old/_просмотр_CSV.py CHANGED
@@ -1,28 +1,28 @@
1
  import os
2
  import streamlit as st, pandas as pd, numpy as np
3
- import my_static_methods as my_stm
4
- st.html(my_stm.STYLE_CORRECTION)
5
 
6
  #st.sidebar.title("⚜️")
7
 
8
  #col1.metric("Temperature", "70 °F", "1.2 °F") #col2.metric("Wind", "9 mph", "-8%") #col3.metric("Humidity", "86%", "4%")
9
 
10
 
11
- REPO = my_stm.HfRepo("f64k/gaziev", "dataset", st.secrets["HF_WRITE"])
12
  st.sidebar.info("🧊💽 сохраненные\n таблицы CSV")
13
 
14
- lstTestFiles = [f for f in my_stm.list_files_hf(REPO) if not f.upper().startswith("ID_XYZ/")]
15
 
16
  cols = st.columns(2)
17
 
18
  #key_xyz = st.selectbox("Выберите файл данных для просмотра таблицы и точек", dictXYZV.keys())
19
  one_file_selected = cols[0].selectbox("Выберите файл данных для просмотра таблицы и точек", lstTestFiles)
20
  if one_file_selected:
21
- dict_ONE_XYZV = my_stm.load_dataframes_from_hf(REPO, [one_file_selected])
22
  if len(dict_ONE_XYZV) > 0:
23
  df_xyz = list(dict_ONE_XYZV.values())[0] #df_xyz = dictXYZV[key_xyz]
24
- df_xyz_vproc = my_stm.df_process_v_column(df_xyz)
25
- fig_plotly_xyzv_scatter = my_stm.plotly_xyzv_scatter_gray(df_xyz_vproc)
26
  cols[0].plotly_chart(fig_plotly_xyzv_scatter) #fig.show()
27
  lstValues = sorted(set(df_xyz.notna().sum()), reverse=True)
28
  cols[1].info(f"Bceгo записей : {lstValues[0]}; Значений V : {lstValues[1]}")
 
1
  import os
2
  import streamlit as st, pandas as pd, numpy as np
3
+ import my_static_tools as mst
4
+ st.html(mst.STYLE_CORRECTION)
5
 
6
  #st.sidebar.title("⚜️")
7
 
8
  #col1.metric("Temperature", "70 °F", "1.2 °F") #col2.metric("Wind", "9 mph", "-8%") #col3.metric("Humidity", "86%", "4%")
9
 
10
 
11
+ REPO = mst.HfRepo("f64k/gaziev", "dataset", st.secrets["HF_WRITE"])
12
  st.sidebar.info("🧊💽 сохраненные\n таблицы CSV")
13
 
14
+ lstTestFiles = [f for f in mst.list_files_hf(REPO) if not f.upper().startswith("ID_XYZ/")]
15
 
16
  cols = st.columns(2)
17
 
18
  #key_xyz = st.selectbox("Выберите файл данных для просмотра таблицы и точек", dictXYZV.keys())
19
  one_file_selected = cols[0].selectbox("Выберите файл данных для просмотра таблицы и точек", lstTestFiles)
20
  if one_file_selected:
21
+ dict_ONE_XYZV = mst.load_dataframes_from_hf(REPO, [one_file_selected])
22
  if len(dict_ONE_XYZV) > 0:
23
  df_xyz = list(dict_ONE_XYZV.values())[0] #df_xyz = dictXYZV[key_xyz]
24
+ df_xyz_vproc = mst.df_process_v_column(df_xyz)
25
+ fig_plotly_xyzv_scatter = mst.plotly_xyzv_scatter_gray(df_xyz_vproc)
26
  cols[0].plotly_chart(fig_plotly_xyzv_scatter) #fig.show()
27
  lstValues = sorted(set(df_xyz.notna().sum()), reverse=True)
28
  cols[1].info(f"Bceгo записей : {lstValues[0]}; Значений V : {lstValues[1]}")