Spaces:
Sleeping
Sleeping
File size: 1,626 Bytes
f81c1a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import pandas as pd
import pickle
def inspect_and_generate_schemas():
# Load Data
print("Loading data...")
df = pd.read_csv('01_merged_data.csv')
# Define features explicitly based on analysis
excluded_cols = [
'id_employee', 'eval_number', 'code_sondage',
'a_quitte_l_entreprise', 'turnover'
]
features = [c for c in df.columns if c not in excluded_cols]
print(f"Selected features: {features}")
# Generate Pydantic Schema
pydantic_fields = []
sql_columns = []
type_mapping = {
'int64': 'int',
'float64': 'float',
'object': 'str',
'bool': 'bool'
}
sql_type_mapping = {
'int64': 'Integer',
'float64': 'Float',
'object': 'String',
'bool': 'Boolean'
}
for feature in features:
dtype = str(df[feature].dtype)
py_type = type_mapping.get(dtype, 'str')
sql_type = sql_type_mapping.get(dtype, 'String')
pydantic_fields.append(f" {feature}: {py_type}")
sql_columns.append(f" {feature} = Column({sql_type})")
pydantic_schema = "class InputSchema(BaseModel):\n" + "\n".join(pydantic_fields)
print("\n--- Generated Pydantic Schema ---")
print(pydantic_schema)
print("\n--- Generated SQL Columns (for reference) ---")
print("\n".join(sql_columns))
# Save to file
with open('generated_schemas_v2.txt', 'w') as f:
f.write(pydantic_schema)
f.write("\n\n")
f.write("\n".join(sql_columns))
if __name__ == "__main__":
inspect_and_generate_schemas()
|