3v324v23 commited on
Commit
65a6d79
·
2 Parent(s): 3a962ff f81c1a5

merge: Merge feat/analysis into dev

Browse files
Files changed (1) hide show
  1. inspect_data.py +59 -0
inspect_data.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import pickle
3
+
4
+ def inspect_and_generate_schemas():
5
+ # Load Data
6
+ print("Loading data...")
7
+ df = pd.read_csv('01_merged_data.csv')
8
+
9
+ # Define features explicitly based on analysis
10
+ excluded_cols = [
11
+ 'id_employee', 'eval_number', 'code_sondage',
12
+ 'a_quitte_l_entreprise', 'turnover'
13
+ ]
14
+
15
+ features = [c for c in df.columns if c not in excluded_cols]
16
+ print(f"Selected features: {features}")
17
+
18
+ # Generate Pydantic Schema
19
+ pydantic_fields = []
20
+ sql_columns = []
21
+
22
+ type_mapping = {
23
+ 'int64': 'int',
24
+ 'float64': 'float',
25
+ 'object': 'str',
26
+ 'bool': 'bool'
27
+ }
28
+
29
+ sql_type_mapping = {
30
+ 'int64': 'Integer',
31
+ 'float64': 'Float',
32
+ 'object': 'String',
33
+ 'bool': 'Boolean'
34
+ }
35
+
36
+ for feature in features:
37
+ dtype = str(df[feature].dtype)
38
+ py_type = type_mapping.get(dtype, 'str')
39
+ sql_type = sql_type_mapping.get(dtype, 'String')
40
+
41
+ pydantic_fields.append(f" {feature}: {py_type}")
42
+ sql_columns.append(f" {feature} = Column({sql_type})")
43
+
44
+ pydantic_schema = "class InputSchema(BaseModel):\n" + "\n".join(pydantic_fields)
45
+
46
+ print("\n--- Generated Pydantic Schema ---")
47
+ print(pydantic_schema)
48
+
49
+ print("\n--- Generated SQL Columns (for reference) ---")
50
+ print("\n".join(sql_columns))
51
+
52
+ # Save to file
53
+ with open('generated_schemas_v2.txt', 'w') as f:
54
+ f.write(pydantic_schema)
55
+ f.write("\n\n")
56
+ f.write("\n".join(sql_columns))
57
+
58
+ if __name__ == "__main__":
59
+ inspect_and_generate_schemas()