Spaces:

Usmansafder
/

EngTrace

Sleeping

EngTrace / evaluation /qa_validator /main.py

usmansafdarktk

Initial commit for Hugging Face Space

a03bf1f 3 months ago

5.15 kB

	import os
	import inspect
	import time
	import json
	from importlib import util
	from . import llm_reviewer

	# The root directory containing all engineering branch templates
	TEMPLATES_ROOT_DIR = "data/templates/branches"

	def discover_template_files(root_dir: str):
	"""Finds all Python template files recursively."""
	template_files = []
	for dirpath, _, filenames in os.walk(root_dir):
	for filename in filenames:
	if filename.endswith(".py") and filename not in ["__init__.py", "constants.py"]:
	template_files.append(os.path.join(dirpath, filename))
	return template_files

	def import_templates_from_path(file_path: str):
	"""Dynamically imports a module and finds all template functions."""
	module_name = os.path.splitext(os.path.basename(file_path))[0]
	spec = util.spec_from_file_location(module_name, file_path)
	if not spec or not spec.loader:
	return []
	module = util.module_from_spec(spec)
	spec.loader.exec_module(module)

	template_functions = []
	for name, func in inspect.getmembers(module, inspect.isfunction):
	if name.startswith("template_"):
	template_functions.append(func)

	return template_functions

	def run_validation_on_all_templates():
	"""
	Discovers, validates, and reports on all EngChain templates.
	"""
	llm_reviewer.setup_api_key()
	template_files = discover_template_files(TEMPLATES_ROOT_DIR)

	if not template_files:
	print(f"No template files found in '{TEMPLATES_ROOT_DIR}'. Exiting.")
	return

	print(f"Found {len(template_files)} template files to validate.")

	approved_templates = []
	flagged_templates = []
	full_report = {}

	for i, file_path in enumerate(template_files):
	print("\n" + "="*80)
	print(f"Processing file {i+1}/{len(template_files)}: {file_path}")
	print("="*80)

	template_functions = import_templates_from_path(file_path)
	if not template_functions:
	print(f"No template functions found in {file_path}. Skipping.")
	continue

	with open(file_path, 'r', encoding='utf-8') as f:
	template_code = f.read()

	for template_func in template_functions:
	template_name = template_func.__name__
	print(f"\n--- Validating template: {template_name} ---")

	instances = [template_func() for _ in range(3)]
	evaluation = llm_reviewer.validate_template_with_llm(template_code, instances, template_name)

	if not evaluation:
	print(f"Validation failed for {template_name}. Flagging for human review.")
	flagged_templates.append(f"{file_path} -> {template_name}")
	full_report[template_name] = {"status": "FAILED_TO_EVALUATE", "details": "No valid response from LLM."}
	continue

	threshold = 4
	scores = [
	evaluation.get('physical_plausibility_score', 0),
	evaluation.get('mathematical_correctness_score', 0),
	evaluation.get('pedagogical_clarity_score', 0)
	]

	is_approved = all(s >= threshold for s in scores)

	report_entry = {
	"file_path": file_path,
	"status": "APPROVED" if is_approved else "FLAGGED",
	"details": evaluation
	}
	full_report[template_name] = report_entry

	if is_approved:
	print(f"Result: APPROVED")
	approved_templates.append(f"{file_path} -> {template_name}")
	else:
	print(f"Result: FLAGGED FOR HUMAN REVIEW")
	flagged_templates.append(f"{file_path} -> {template_name}")

	# Add a small delay to respect potential API rate limits
	time.sleep(2)

	# Define the output directory and create it if it doesn't exist
	output_dir = os.path.join("evaluation", "qa_validator", "results")
	os.makedirs(output_dir, exist_ok=True)

	# Define the full path for the report file
	report_filename = os.path.join(output_dir, "qa_summary_report.json")

	# Save the detailed report to the specified path
	with open(report_filename, 'w') as f:
	json.dump(full_report, f, indent=4)
	print(f"\nDetailed report saved to '{report_filename}'")

	# Print the final summary
	print("\n" + "="*80)
	print("AI-ASSISTED QA FINAL SUMMARY")
	print("="*80)
	print(f"\nAPPROVED TEMPLATES ({len(approved_templates)}):")
	if approved_templates:
	for t in approved_templates:
	print(f"- {t}")
	else:
	print("None")

	print(f"\nFLAGGED FOR HUMAN REVIEW ({len(flagged_templates)}):")
	if flagged_templates:
	for t in flagged_templates:
	print(f"- {t}")
	else:
	print("None")
	print("="*80)


	if __name__ == "__main__":
	run_validation_on_all_templates()