| import os |
| import json |
| import yaml |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
| from huggingface_hub import HfApi |
|
|
| def load_json_config(file_path): |
| """Safely load JSON configuration files""" |
| try: |
| with open(file_path, 'r') as f: |
| return json.load(f) |
| except Exception as e: |
| print(f"β Error loading {file_path}: {e}") |
| return None |
|
|
| def review_adapter_config(config_path): |
| """ |
| Comprehensive review of adapter configuration |
| """ |
| print("\nπ Adapter Configuration Analysis:") |
| config = load_json_config(config_path) |
| |
| if not config: |
| return |
| |
| |
| checks = [ |
| ("Adapter Type", config.get('adapter_type')), |
| ("Base Model", config.get('base_model_name_or_path')), |
| ("Reduction Factor", config.get('reduction_factor')), |
| ("Target Modules", config.get('target_modules')) |
| ] |
| |
| for label, value in checks: |
| status = "β
" if value is not None else "β" |
| print(f"{status} {label}: {value}") |
| |
| |
| if 'peft_type' in config: |
| print(f"β
PEFT Type: {config['peft_type']}") |
|
|
| def review_tokenizer_config(config_path): |
| """ |
| Comprehensive review of tokenizer configuration |
| """ |
| print("\nπ Tokenizer Configuration Analysis:") |
| config = load_json_config(config_path) |
| |
| if not config: |
| return |
| |
| |
| tokenizer_checks = [ |
| ("Vocabulary Size", config.get('vocab_size')), |
| ("Padding Side", config.get('padding_side')), |
| ("Truncation Side", config.get('truncation_side')), |
| ("Model Max Length", config.get('model_max_length')), |
| ("Special Tokens", config.get('special_tokens_map_file')) |
| ] |
| |
| for label, value in tokenizer_checks: |
| status = "β
" if value is not None else "β" |
| print(f"{status} {label}: {value}") |
|
|
| def check_inference_yaml(yaml_path): |
| """ |
| Review inference configuration |
| """ |
| print("\nπ Inference Configuration Analysis:") |
| try: |
| with open(yaml_path, 'r') as f: |
| inference_config = yaml.safe_load(f) |
| |
| |
| print("Inference Configuration Details:") |
| print(json.dumps(inference_config, indent=2)) |
| except Exception as e: |
| print(f"β Error reading inference YAML: {e}") |
|
|
| def model_loading_test(model_id): |
| """ |
| Test model loading and basic generation |
| """ |
| print("\nπ§ͺ Model Loading and Generation Test:") |
| try: |
| |
| tokenizer = AutoTokenizer.from_pretrained(model_id) |
| print("β
Tokenizer Loaded Successfully") |
| |
| |
| model = AutoModelForCausalLM.from_pretrained(model_id) |
| print("β
Model Loaded Successfully") |
| |
| |
| test_prompt = "Explain machine learning in simple terms:" |
| input_ids = tokenizer.encode(test_prompt, return_tensors="pt") |
| |
| |
| output = model.generate(input_ids, max_length=100, num_return_sequences=1) |
| generated_text = tokenizer.decode(output[0], skip_special_tokens=True) |
| |
| print("β
Basic Generation Test Passed") |
| print("\nπ Generated Sample:") |
| print(generated_text) |
| except Exception as e: |
| print(f"β Model Loading/Generation Failed: {e}") |
|
|
| def optimize_repository_structure(model_id): |
| """ |
| Provide recommendations for repository optimization |
| """ |
| print("\nπ οΈ Repository Optimization Recommendations:") |
| |
| |
| print("Checkpoint Management:") |
| print("1. Consider keeping only the latest checkpoint (36)") |
| print("2. Archive or remove older checkpoints to reduce repository size") |
| |
| |
| print("\nConfiguration File Optimization:") |
| print("1. Ensure consistent naming across checkpoint configurations") |
| print("2. Verify that the latest checkpoint has the most up-to-date configs") |
| |
| |
| print("\nDocumentation Recommendations:") |
| print("1. Update README.md with:") |
| print(" - Model architecture details") |
| print(" - Training methodology") |
| print(" - Intended use cases") |
| print(" - Performance metrics") |
|
|
| def main(): |
| model_id = "mycholpath/RA-Mistral-7B" |
| base_path = "." |
| |
| |
| adapter_config_path = os.path.join(base_path, "adapter_config.json") |
| tokenizer_config_path = os.path.join(base_path, "tokenizer_config.json") |
| inference_yaml_path = os.path.join(base_path, "inference.yaml") |
| |
| |
| review_adapter_config(adapter_config_path) |
| review_tokenizer_config(tokenizer_config_path) |
| check_inference_yaml(inference_yaml_path) |
| model_loading_test(model_id) |
| optimize_repository_structure(model_id) |
|
|
| if __name__ == "__main__": |
| main() |
|
|