| import json |
| import os |
| import re |
| import sys |
| import ast |
|
|
| def load_template(template_name): |
| """ |
| Finds and loads the template file. |
| Searches in known directories: ., Regression, Logistic_Regression, Random_Forest, Decision_Tree |
| """ |
| search_dirs = [ |
| ".", |
| "Regression", |
| "Logistic_Regression", |
| "Random_Forest", |
| "Decision_Tree", |
| "ANN" |
| ] |
| |
| for d in search_dirs: |
| path = os.path.join(d, template_name) |
| if os.path.exists(path): |
| with open(path, "r") as f: |
| return f.read() |
| |
| |
| for root, dirs, files in os.walk("."): |
| if template_name in files: |
| with open(os.path.join(root, template_name), "r") as f: |
| return f.read() |
| |
| raise FileNotFoundError(f"Template '{template_name}' not found in search paths.") |
|
|
| def is_number(s): |
| try: |
| float(s) |
| return True |
| except ValueError: |
| return False |
|
|
| def format_data_map(data_dict): |
| """ |
| Converts the JSON data dictionary into a Python dictionary string |
| compatible with AdvancedAnalytics (using DT class and tuples). |
| """ |
| lines = [] |
| lines.append("data_map = {") |
| |
| for key, value in data_dict.items(): |
| |
| |
| |
| dtype_str = value[0] |
| val_range = value[1] |
| |
| |
| |
| |
| |
| |
| if isinstance(val_range, str): |
| try: |
| |
| if val_range.strip().startswith(('(', '[')): |
| parsed = ast.literal_eval(val_range) |
| if isinstance(parsed, (list, tuple)): |
| val_range = parsed |
| except: |
| pass |
|
|
| if isinstance(val_range, (list, tuple)): |
| |
| all_numbers = True |
| for item in val_range: |
| if not isinstance(item, (int, float)) and not (isinstance(item, str) and is_number(item)): |
| all_numbers = False |
| break |
| |
| if all_numbers: |
| |
| |
| clean_items = [] |
| for x in val_range: |
| if isinstance(x, str): |
| try: |
| clean_items.append(int(x)) |
| except: |
| clean_items.append(float(x)) |
| else: |
| clean_items.append(x) |
| val_tuple_str = "(" + ", ".join(str(x) for x in clean_items) + ")" |
| else: |
| |
| |
| val_tuple_str = "(" + ", ".join(repr(str(x)) for x in val_range) + ")" |
| |
| elif isinstance(val_range, str): |
| |
| |
| val_tuple_str = val_range |
| else: |
| val_tuple_str = str(val_range) |
| |
| |
| |
| line = f" '{key}': [{dtype_str}, {val_tuple_str}]," |
| lines.append(line) |
| |
| lines.append("}") |
| return "\n".join(lines) |
|
|
| def generate_code(json_path, output_filename="solution.py", model_override=None): |
| """ |
| Main function to read JSON prescription and generate Python code. |
| If model_override is provided, use that template instead of the one in the JSON. |
| Returns the path to the generated file. |
| """ |
| print(f"Reading prescription from: {json_path}") |
| with open(json_path, "r") as f: |
| prescription = json.load(f) |
| |
| |
| model_template = model_override or prescription.get("suggested_model") |
| target_var = prescription.get("target_variable") |
| data_file = prescription.get("data_file") |
| data_dictionary = prescription.get("data_dictionary") |
| |
| print(f"Target: {target_var}") |
| print(f"Model: {model_template}") |
| print(f"Data: {data_file}") |
| |
| |
| try: |
| template_code = load_template(model_template) |
| except FileNotFoundError: |
| |
| print(f"Warning: Template {model_template} not found. Using generic placeholder.") |
| template_code = load_template("BinaryRandomForest_Template.py") |
|
|
| |
| data_map_code = format_data_map(data_dictionary) |
| |
| |
| new_code = template_code |
| |
| |
| |
| |
| data_map_pattern = r"data_map\s*=\s*\{.*?\}" |
| new_code = re.sub(data_map_pattern, data_map_code, new_code, flags=re.DOTALL) |
| |
| |
| |
| target_pattern = r'target\s*=\s*".*?"' |
| new_code = re.sub(target_pattern, f'target = "{target_var}"', new_code) |
| |
| |
| |
| |
| |
| |
| filename = "your_data_file.csv" |
| if data_file: |
| filename = os.path.basename(data_file) |
| |
| |
| delimiter = prescription.get("delimiter", ",") |
| |
| if not delimiter or delimiter == "comma": delimiter = "," |
| |
| |
| if delimiter.lower() == "tab": delimiter = "\t" |
| if delimiter.lower() == "space": delimiter = " " |
| if delimiter.lower() == "semicolon": delimiter = ";" |
| |
| |
| |
| |
| if filename.lower().endswith(('.xls', '.xlsx')): |
| read_cmd = f'pd.read_excel("{filename}")' |
| else: |
| if delimiter == ",": |
| read_cmd = f'pd.read_csv("{filename}")' |
| elif delimiter == "\t": |
| |
| read_cmd = f'pd.read_csv("{filename}", sep="\\t")' |
| else: |
| |
| |
| read_cmd = f'pd.read_csv("{filename}", sep={repr(delimiter)})' |
|
|
| |
| |
| safe_read_cmd = read_cmd.replace("\\", "\\\\") |
|
|
| |
| |
| read_csv_pattern = r"pd\.read_csv\(([\"']).*?\1\)" |
| new_code = re.sub(read_csv_pattern, safe_read_cmd, new_code) |
| |
| |
| read_excel_pattern = r"pd\.read_excel\(([\"']).*?\1\)" |
| new_code = re.sub(read_excel_pattern, safe_read_cmd, new_code) |
| |
| |
| with open(output_filename, "w") as f: |
| f.write(new_code) |
| |
| print(f"Successfully generated: {output_filename}") |
| return output_filename |
|
|
| if __name__ == "__main__": |
| |
| files = [f for f in os.listdir('.') if f.startswith('project_context_') and f.endswith('.json')] |
| if files: |
| files.sort(reverse=True) |
| latest_json = files[0] |
| generate_code(latest_json) |
|
|