Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import pickle | |
| def inspect_and_generate_schemas(): | |
| # Load Data | |
| print("Loading data...") | |
| df = pd.read_csv('01_merged_data.csv') | |
| # Define features explicitly based on analysis | |
| excluded_cols = [ | |
| 'id_employee', 'eval_number', 'code_sondage', | |
| 'a_quitte_l_entreprise', 'turnover' | |
| ] | |
| features = [c for c in df.columns if c not in excluded_cols] | |
| print(f"Selected features: {features}") | |
| # Generate Pydantic Schema | |
| pydantic_fields = [] | |
| sql_columns = [] | |
| type_mapping = { | |
| 'int64': 'int', | |
| 'float64': 'float', | |
| 'object': 'str', | |
| 'bool': 'bool' | |
| } | |
| sql_type_mapping = { | |
| 'int64': 'Integer', | |
| 'float64': 'Float', | |
| 'object': 'String', | |
| 'bool': 'Boolean' | |
| } | |
| for feature in features: | |
| dtype = str(df[feature].dtype) | |
| py_type = type_mapping.get(dtype, 'str') | |
| sql_type = sql_type_mapping.get(dtype, 'String') | |
| pydantic_fields.append(f" {feature}: {py_type}") | |
| sql_columns.append(f" {feature} = Column({sql_type})") | |
| pydantic_schema = "class InputSchema(BaseModel):\n" + "\n".join(pydantic_fields) | |
| print("\n--- Generated Pydantic Schema ---") | |
| print(pydantic_schema) | |
| print("\n--- Generated SQL Columns (for reference) ---") | |
| print("\n".join(sql_columns)) | |
| # Save to file | |
| with open('generated_schemas_v2.txt', 'w') as f: | |
| f.write(pydantic_schema) | |
| f.write("\n\n") | |
| f.write("\n".join(sql_columns)) | |
| if __name__ == "__main__": | |
| inspect_and_generate_schemas() | |