File size: 5,148 Bytes
a03bf1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import inspect
import time
import json
from importlib import util
from . import llm_reviewer

# The root directory containing all engineering branch templates
TEMPLATES_ROOT_DIR = "data/templates/branches"

def discover_template_files(root_dir: str):
    """Finds all Python template files recursively."""
    template_files = []
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.endswith(".py") and filename not in ["__init__.py", "constants.py"]:
                template_files.append(os.path.join(dirpath, filename))
    return template_files

def import_templates_from_path(file_path: str):
    """Dynamically imports a module and finds all template functions."""
    module_name = os.path.splitext(os.path.basename(file_path))[0]
    spec = util.spec_from_file_location(module_name, file_path)
    if not spec or not spec.loader:
        return []
    module = util.module_from_spec(spec)
    spec.loader.exec_module(module)
    
    template_functions = []
    for name, func in inspect.getmembers(module, inspect.isfunction):
        if name.startswith("template_"):
            template_functions.append(func)
            
    return template_functions

def run_validation_on_all_templates():
    """

    Discovers, validates, and reports on all EngChain templates.

    """
    llm_reviewer.setup_api_key()
    template_files = discover_template_files(TEMPLATES_ROOT_DIR)
    
    if not template_files:
        print(f"No template files found in '{TEMPLATES_ROOT_DIR}'. Exiting.")
        return

    print(f"Found {len(template_files)} template files to validate.")
    
    approved_templates = []
    flagged_templates = []
    full_report = {}

    for i, file_path in enumerate(template_files):
        print("\n" + "="*80)
        print(f"Processing file {i+1}/{len(template_files)}: {file_path}")
        print("="*80)

        template_functions = import_templates_from_path(file_path)
        if not template_functions:
            print(f"No template functions found in {file_path}. Skipping.")
            continue

        with open(file_path, 'r', encoding='utf-8') as f:
            template_code = f.read()

        for template_func in template_functions:
            template_name = template_func.__name__
            print(f"\n--- Validating template: {template_name} ---")
            
            instances = [template_func() for _ in range(3)]
            evaluation = llm_reviewer.validate_template_with_llm(template_code, instances, template_name)
            
            if not evaluation:
                print(f"Validation failed for {template_name}. Flagging for human review.")
                flagged_templates.append(f"{file_path} -> {template_name}")
                full_report[template_name] = {"status": "FAILED_TO_EVALUATE", "details": "No valid response from LLM."}
                continue

            threshold = 4
            scores = [
                evaluation.get('physical_plausibility_score', 0),
                evaluation.get('mathematical_correctness_score', 0),
                evaluation.get('pedagogical_clarity_score', 0)
            ]
            
            is_approved = all(s >= threshold for s in scores)
            
            report_entry = {
                "file_path": file_path,
                "status": "APPROVED" if is_approved else "FLAGGED",
                "details": evaluation
            }
            full_report[template_name] = report_entry

            if is_approved:
                print(f"Result: APPROVED")
                approved_templates.append(f"{file_path} -> {template_name}")
            else:
                print(f"Result: FLAGGED FOR HUMAN REVIEW")
                flagged_templates.append(f"{file_path} -> {template_name}")
            
            # Add a small delay to respect potential API rate limits
            time.sleep(2) 

    # Define the output directory and create it if it doesn't exist
    output_dir = os.path.join("evaluation", "qa_validator", "results")
    os.makedirs(output_dir, exist_ok=True)

    # Define the full path for the report file
    report_filename = os.path.join(output_dir, "qa_summary_report.json")
    
    # Save the detailed report to the specified path
    with open(report_filename, 'w') as f:
        json.dump(full_report, f, indent=4)
    print(f"\nDetailed report saved to '{report_filename}'")

    # Print the final summary
    print("\n" + "="*80)
    print("AI-ASSISTED QA FINAL SUMMARY")
    print("="*80)
    print(f"\nAPPROVED TEMPLATES ({len(approved_templates)}):")
    if approved_templates:
        for t in approved_templates:
            print(f"- {t}")
    else:
        print("None")
        
    print(f"\nFLAGGED FOR HUMAN REVIEW ({len(flagged_templates)}):")
    if flagged_templates:
        for t in flagged_templates:
            print(f"- {t}")
    else:
        print("None")
    print("="*80)


if __name__ == "__main__":
    run_validation_on_all_templates()