# prepare_model.py import os import json import shutil from pathlib import Path def create_minimal_model_structure(model_path="."): """ Create minimal required files for Hugging Face model upload """ # Create directories if they don't exist os.makedirs(model_path, exist_ok=True) # 1. Check for model files model_files = list(Path(model_path).glob("*.safetensors")) + \ list(Path(model_path).glob("*.bin")) + \ list(Path(model_path).glob("pytorch_model*.bin")) if not model_files: print("⚠️ Warning: No model weight files found!") print(" Expected: *.safetensors, *.bin, or pytorch_model*.bin") # 2. Create config.json if missing config_path = Path(model_path) / "config.json" if not config_path.exists(): print("📝 Creating minimal config.json...") config = { "_name_or_path": "abdelac/Mistral_Test", "architectures": ["MistralForCausalLM"], # Adjust based on your model "model_type": "mistral", "torch_dtype": "float16", "transformers_version": "4.35.0" } with open(config_path, "w") as f: json.dump(config, f, indent=2) # 3. Create tokenizer files if missing tokenizer_config_path = Path(model_path) / "tokenizer_config.json" if not tokenizer_config_path.exists(): print("📝 Creating tokenizer_config.json...") tokenizer_config = { "bos_token": "", "eos_token": "", "pad_token": "", "unk_token": "", "model_max_length": 32768, "clean_up_tokenization_spaces": False } with open(tokenizer_config_path, "w") as f: json.dump(tokenizer_config, f, indent=2) # 4. Create special_tokens_map.json special_tokens_path = Path(model_path) / "special_tokens_map.json" if not special_tokens_path.exists(): print("📝 Creating special_tokens_map.json...") special_tokens = { "bos_token": "", "eos_token": "", "pad_token": "", "unk_token": "" } with open(special_tokens_path, "w") as f: json.dump(special_tokens, f, indent=2) # 5. Create README.md readme_path = Path(model_path) / "README.md" if not readme_path.exists(): print("📝 Creating README.md...") readme_content = """--- language: - en license: apache-2.0 tags: - generated_from_trainer - mistral - text-generation --- # Model Card ## Model Description This model is a fine-tuned version of Mistral. ## Usage ```python from transformers import AutoModelForCausalLM, AutoTokenizer model = AutoModelForCausalLM.from_pretrained("abdelac/Mistral_Test") tokenizer = AutoTokenizer.from_pretrained("abdelac/Mistral_Test") prompt = "Explain machine learning" inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=100) print(tokenizer.decode(outputs[0], skip_special_tokens=True))