Spaces:
Sleeping
Sleeping
| # prepare_model.py | |
| import os | |
| import json | |
| import shutil | |
| from pathlib import Path | |
| def create_minimal_model_structure(model_path="."): | |
| """ | |
| Create minimal required files for Hugging Face model upload | |
| """ | |
| # Create directories if they don't exist | |
| os.makedirs(model_path, exist_ok=True) | |
| # 1. Check for model files | |
| model_files = list(Path(model_path).glob("*.safetensors")) + \ | |
| list(Path(model_path).glob("*.bin")) + \ | |
| list(Path(model_path).glob("pytorch_model*.bin")) | |
| if not model_files: | |
| print("β οΈ Warning: No model weight files found!") | |
| print(" Expected: *.safetensors, *.bin, or pytorch_model*.bin") | |
| # 2. Create config.json if missing | |
| config_path = Path(model_path) / "config.json" | |
| if not config_path.exists(): | |
| print("π Creating minimal config.json...") | |
| config = { | |
| "_name_or_path": "abdelac/Mistral_Test", | |
| "architectures": ["MistralForCausalLM"], # Adjust based on your model | |
| "model_type": "mistral", | |
| "torch_dtype": "float16", | |
| "transformers_version": "4.35.0" | |
| } | |
| with open(config_path, "w") as f: | |
| json.dump(config, f, indent=2) | |
| # 3. Create tokenizer files if missing | |
| tokenizer_config_path = Path(model_path) / "tokenizer_config.json" | |
| if not tokenizer_config_path.exists(): | |
| print("π Creating tokenizer_config.json...") | |
| tokenizer_config = { | |
| "bos_token": "<s>", | |
| "eos_token": "</s>", | |
| "pad_token": "</s>", | |
| "unk_token": "<unk>", | |
| "model_max_length": 32768, | |
| "clean_up_tokenization_spaces": False | |
| } | |
| with open(tokenizer_config_path, "w") as f: | |
| json.dump(tokenizer_config, f, indent=2) | |
| # 4. Create special_tokens_map.json | |
| special_tokens_path = Path(model_path) / "special_tokens_map.json" | |
| if not special_tokens_path.exists(): | |
| print("π Creating special_tokens_map.json...") | |
| special_tokens = { | |
| "bos_token": "<s>", | |
| "eos_token": "</s>", | |
| "pad_token": "</s>", | |
| "unk_token": "<unk>" | |
| } | |
| with open(special_tokens_path, "w") as f: | |
| json.dump(special_tokens, f, indent=2) | |
| # 5. Create README.md | |
| readme_path = Path(model_path) / "README.md" | |
| if not readme_path.exists(): | |
| print("π Creating README.md...") | |
| readme_content = """--- | |
| language: | |
| - en | |
| license: apache-2.0 | |
| tags: | |
| - generated_from_trainer | |
| - mistral | |
| - text-generation | |
| --- | |
| # Model Card | |
| ## Model Description | |
| This model is a fine-tuned version of Mistral. | |
| ## Usage | |
| ```python | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| model = AutoModelForCausalLM.from_pretrained("abdelac/Mistral_Test") | |
| tokenizer = AutoTokenizer.from_pretrained("abdelac/Mistral_Test") | |
| prompt = "Explain machine learning" | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| outputs = model.generate(**inputs, max_new_tokens=100) | |
| print(tokenizer.decode(outputs[0], skip_special_tokens=True)) |