eesfeg commited on
Commit
1e639fb
Β·
1 Parent(s): f9cb048

Add application file

Browse files
api_fastapi.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # api_fastapi.py
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import torch
6
+ import uvicorn
7
+
8
+ app = FastAPI(title="Mistral API")
9
+
10
+ class ChatRequest(BaseModel):
11
+ prompt: str
12
+ max_tokens: int = 500
13
+ temperature: float = 0.7
14
+
15
+ # Global model instance
16
+ MODEL = None
17
+ TOKENIZER = None
18
+
19
+ @app.on_event("startup")
20
+ async def load_model():
21
+ global MODEL, TOKENIZER
22
+ try:
23
+ TOKENIZER = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
24
+ MODEL = AutoModelForCausalLM.from_pretrained(
25
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
26
+ torch_dtype=torch.float16,
27
+ device_map="auto",
28
+ load_in_8bit=True
29
+ )
30
+ print("Model loaded successfully!")
31
+ except Exception as e:
32
+ print(f"Error loading model: {e}")
33
+
34
+ @app.get("/health")
35
+ async def health():
36
+ return {"status": "healthy", "model_loaded": MODEL is not None}
37
+
38
+ @app.post("/chat")
39
+ async def chat_completion(request: ChatRequest):
40
+ if MODEL is None:
41
+ raise HTTPException(status_code=503, detail="Model not loaded")
42
+
43
+ try:
44
+ # Format prompt
45
+ formatted_prompt = f"[INST] {request.prompt} [/INST]"
46
+
47
+ # Tokenize
48
+ inputs = TOKENIZER(formatted_prompt, return_tensors="pt").to(MODEL.device)
49
+
50
+ # Generate
51
+ with torch.no_grad():
52
+ outputs = MODEL.generate(
53
+ **inputs,
54
+ max_new_tokens=request.max_tokens,
55
+ temperature=request.temperature,
56
+ do_sample=True,
57
+ top_p=0.95
58
+ )
59
+
60
+ # Decode
61
+ response = TOKENIZER.decode(outputs[0], skip_special_tokens=True)
62
+ response = response.split("[/INST]")[-1].strip()
63
+
64
+ return {
65
+ "response": response,
66
+ "tokens_generated": len(outputs[0]) - len(inputs.input_ids[0])
67
+ }
68
+
69
+ except Exception as e:
70
+ raise HTTPException(status_code=500, detail=str(e))
71
+
72
+ @app.post("/batch_chat")
73
+ async def batch_chat(requests: list[ChatRequest]):
74
+ """Process multiple prompts at once"""
75
+ responses = []
76
+ for req in requests:
77
+ result = await chat_completion(req)
78
+ responses.append(result)
79
+ return {"responses": responses}
80
+
81
+ if __name__ == "__main__":
82
+ uvicorn.run(app, host="0.0.0.0", port=8000)
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from peft import PeftModel
5
+
6
+ BASE_MODEL = "abdelac/tinyllama"
7
+ LORA_MODEL = "abdelac/tinyllama-lora"
8
+
9
+ # Load tokenizer
10
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
11
+
12
+ # Load base model (4-bit for low RAM)
13
+ base_model = AutoModelForCausalLM.from_pretrained(
14
+ BASE_MODEL,
15
+ load_in_4bit=True,
16
+ device_map="auto",
17
+ torch_dtype=torch.float16
18
+ )
19
+
20
+ # Load LoRA adapters
21
+ model = PeftModel.from_pretrained(base_model, LORA_MODEL)
22
+ model.eval()
23
+
24
+ def chat(prompt, max_tokens=200, temperature=0.7):
25
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
26
+
27
+ with torch.no_grad():
28
+ outputs = model.generate(
29
+ **inputs,
30
+ max_new_tokens=max_tokens,
31
+ do_sample=True,
32
+ temperature=temperature,
33
+ top_p=0.9
34
+ )
35
+
36
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
37
+
38
+ # Gradio UI
39
+ demo = gr.Interface(
40
+ fn=chat,
41
+ inputs=[
42
+ gr.Textbox(lines=4, label="Prompt"),
43
+ gr.Slider(50, 500, value=200, label="Max tokens"),
44
+ gr.Slider(0.1, 1.5, value=0.7, label="Temperature")
45
+ ],
46
+ outputs="text",
47
+ title="TinyLlama Fine-Tuned (LoRA)",
48
+ description="TinyLlama loaded with LoRA adapters for domain-specific inference"
49
+ )
50
+
51
+ demo.launch()
app_gradio.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app_gradio.py
2
+ import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
+
6
+ class MistralApp:
7
+ def __init__(self):
8
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
9
+ self.model = None
10
+ self.tokenizer = None
11
+
12
+ def load_model(self):
13
+ if self.model is None:
14
+ print("Loading model...")
15
+ self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
16
+ self.model = AutoModelForCausalLM.from_pretrained(
17
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
18
+ torch_dtype=torch.float16,
19
+ device_map="auto",
20
+ load_in_8bit=True # Reduce memory usage
21
+ )
22
+ print("Model loaded!")
23
+ return "Model loaded successfully!"
24
+
25
+ def respond(self, message, history):
26
+ # Format chat history
27
+ formatted_prompt = self.format_chat(message, history)
28
+
29
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.device)
30
+
31
+ with torch.no_grad():
32
+ outputs = self.model.generate(
33
+ **inputs,
34
+ max_new_tokens=512,
35
+ temperature=0.7,
36
+ do_sample=True
37
+ )
38
+
39
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+ return response.split("[/INST]")[-1].strip()
41
+
42
+ def format_chat(self, message, history):
43
+ prompt = ""
44
+ for user_msg, assistant_msg in history:
45
+ prompt += f"[INST] {user_msg} [/INST] {assistant_msg} "
46
+ prompt += f"[INST] {message} [/INST]"
47
+ return prompt
48
+
49
+ # Create Gradio interface
50
+ app = MistralApp()
51
+
52
+ with gr.Blocks(title="Mistral Chat Assistant") as demo:
53
+ gr.Markdown("# πŸ€– Mistral 7B Chat Assistant")
54
+
55
+ with gr.Row():
56
+ with gr.Column(scale=1):
57
+ load_btn = gr.Button("Load Model", variant="primary")
58
+ status = gr.Textbox(label="Status", interactive=False)
59
+
60
+ with gr.Column(scale=4):
61
+ chatbot = gr.Chatbot(height=500)
62
+ msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
63
+ send_btn = gr.Button("Send", variant="primary")
64
+ clear_btn = gr.Button("Clear")
65
+
66
+ # Connect events
67
+ load_btn.click(app.load_model, outputs=status)
68
+
69
+ def user(user_message, history):
70
+ return "", history + [[user_message, None]]
71
+
72
+ def bot(history):
73
+ response = app.respond(history[-1][0], history[:-1])
74
+ history[-1][1] = response
75
+ return history
76
+
77
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
78
+ bot, chatbot, chatbot
79
+ )
80
+ send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
81
+ bot, chatbot, chatbot
82
+ )
83
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
84
+
85
+ if __name__ == "__main__":
86
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
basic_inference.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # basic_inference.py
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ device = "cuda" if torch.cuda.is_available() else "cpu"
6
+ print(f"Using device: {device}")
7
+
8
+ # Load from local directory or Hugging Face
9
+ model_path = "./tinyllama" # or "mistralai/Mistral-7B-Instruct-v0.1"
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_path,
14
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
15
+ device_map="auto" if device == "cuda" else None,
16
+ )
17
+
18
+ # Move to device if not using device_map
19
+ if device == "cuda":
20
+ model = model.to(device)
data.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "instruction": "Explain fake news",
4
+ "output": "Fake news is false or misleading information presented as news."
5
+ },
6
+ {
7
+ "instruction": "What is AI?",
8
+ "output": "Artificial Intelligence is the simulation of human intelligence by machines."
9
+ }
10
+ ]
deepseek_dockerfile_20251226_58f521.dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile
2
+ FROM python:3.9-slim
3
+
4
+ WORKDIR /app
5
+
6
+ # Install dependencies
7
+ COPY requirements.txt .
8
+ RUN pip install --no-cache-dir -r requirements.txt
9
+
10
+ # Copy application
11
+ COPY . .
12
+
13
+ # Expose port
14
+ EXPOSE 8000
15
+
16
+ # Run the application
17
+ CMD ["python", "api_fastapi.py"]
dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile
2
+ FROM python:3.9-slim
3
+
4
+ WORKDIR /app
5
+
6
+ # Install dependencies
7
+ COPY requirements.txt .
8
+ RUN pip install --no-cache-dir -r requirements.txt
9
+
10
+ # Copy application
11
+ COPY . .
12
+
13
+ # Expose port
14
+ EXPOSE 8000
15
+
16
+ # Run the application
17
+ CMD ["python", "api_fastapi.py"]
dockerfile.dockerfile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt
2
+ torch==2.1.0
3
+ transformers==4.35.0
4
+ accelerate==0.24.1
5
+ fastapi==0.104.1
6
+ uvicorn[standard]==0.24.0
7
+ gradio==4.8.0
8
+ sentencepiece==0.1.99
9
+ bitsandbytes==0.41.1
inference.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # inference.py
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
4
+
5
+ class MistralChat:
6
+ def __init__(self, model_path="TinyLlama/TinyLlama-1.1B-Chat-v1.0"):
7
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
8
+
9
+ print("Loading model...")
10
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
11
+ self.model = AutoModelForCausalLM.from_pretrained(
12
+ model_path,
13
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
14
+ device_map="auto" if self.device == "cuda" else None,
15
+ trust_remote_code=True
16
+ )
17
+
18
+ if self.device == "cuda":
19
+ self.model = self.model.to(self.device)
20
+
21
+ print("Model loaded successfully!")
22
+
23
+ def generate(self, prompt, max_length=500, temperature=0.7):
24
+ # Format for instruct models
25
+ formatted_prompt = f"[INST] {prompt} [/INST]"
26
+
27
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt")
28
+ if self.device == "cuda":
29
+ inputs = inputs.to(self.device)
30
+
31
+ with torch.no_grad():
32
+ outputs = self.model.generate(
33
+ **inputs,
34
+ max_new_tokens=max_length,
35
+ temperature=temperature,
36
+ do_sample=True,
37
+ top_p=0.95,
38
+ pad_token_id=self.tokenizer.eos_token_id
39
+ )
40
+
41
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
42
+ # Extract only the assistant's response
43
+ if "[/INST]" in response:
44
+ response = response.split("[/INST]")[1].strip()
45
+
46
+ return response
47
+
48
+ def chat_stream(self, prompt):
49
+ """Stream the response token by token"""
50
+ formatted_prompt = f"[INST] {prompt} [/INST]"
51
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt")
52
+
53
+ streamer = TextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
54
+
55
+ if self.device == "cuda":
56
+ inputs = inputs.to(self.device)
57
+
58
+ _ = self.model.generate(**inputs, streamer=streamer, max_new_tokens=500)
59
+
60
+ # Usage
61
+ if __name__ == "__main__":
62
+ chat = MistralChat()
63
+
64
+ # Single response
65
+ response = chat.generate("Explain quantum computing in simple terms")
66
+ print("Response:", response)
67
+
68
+ # Streaming response
69
+ print("\nStreaming response:")
70
+ chat.chat_stream("Write a short poem about AI")
optimization.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # optimization.py
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
+
5
+ # 1. Use pipeline for simplicity
6
+ pipe = pipeline(
7
+ "text-generation",
8
+ model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
9
+ model_kwargs={
10
+ "torch_dtype": torch.float16,
11
+ "device_map": "auto",
12
+ "load_in_4bit": True
13
+ },
14
+ tokenizer="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
15
+ )
16
+
17
+ # 2. Use vLLM for high-throughput (install: pip install vLLM)
18
+ from vllm import LLM, SamplingParams
19
+
20
+ llm = LLM(model="mTinyLlama/TinyLlama-1.1B-Chat-v1.0")
21
+ sampling_params = SamplingParams(temperature=0.7, max_tokens=500)
22
+ outputs = llm.generate(["Hello, how are you?"], sampling_params)
23
+
24
+ # 3. Cache model responses
25
+ import hashlib
26
+ from functools import lru_cache
27
+
28
+ @lru_cache(maxsize=1000)
29
+ def cached_generation(prompt, max_tokens=500):
30
+ return pipe(prompt, max_new_tokens=max_tokens)[0]['generated_text']
optimized_loading.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # optimized_loading.py
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ from accelerate import infer_auto_device_map
5
+
6
+ # 4-bit quantization (reduces memory by 75%)
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
9
+ load_in_4bit=True, # or load_in_8bit=True for 8-bit
10
+ device_map="auto",
11
+ torch_dtype=torch.float16,
12
+ )
13
+
14
+ # CPU offloading (for low RAM)
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
17
+ device_map="auto",
18
+ offload_folder="offload",
19
+ offload_state_dict=True,
20
+ )
prepare_model.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prepare_model.py
2
+ import os
3
+ import json
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+ def create_minimal_model_structure(model_path="."):
8
+ """
9
+ Create minimal required files for Hugging Face model upload
10
+ """
11
+
12
+ # Create directories if they don't exist
13
+ os.makedirs(model_path, exist_ok=True)
14
+
15
+ # 1. Check for model files
16
+ model_files = list(Path(model_path).glob("*.safetensors")) + \
17
+ list(Path(model_path).glob("*.bin")) + \
18
+ list(Path(model_path).glob("pytorch_model*.bin"))
19
+
20
+ if not model_files:
21
+ print("⚠️ Warning: No model weight files found!")
22
+ print(" Expected: *.safetensors, *.bin, or pytorch_model*.bin")
23
+
24
+ # 2. Create config.json if missing
25
+ config_path = Path(model_path) / "config.json"
26
+ if not config_path.exists():
27
+ print("πŸ“ Creating minimal config.json...")
28
+ config = {
29
+ "_name_or_path": "abdelac/Mistral_Test",
30
+ "architectures": ["MistralForCausalLM"], # Adjust based on your model
31
+ "model_type": "mistral",
32
+ "torch_dtype": "float16",
33
+ "transformers_version": "4.35.0"
34
+ }
35
+ with open(config_path, "w") as f:
36
+ json.dump(config, f, indent=2)
37
+
38
+ # 3. Create tokenizer files if missing
39
+ tokenizer_config_path = Path(model_path) / "tokenizer_config.json"
40
+ if not tokenizer_config_path.exists():
41
+ print("πŸ“ Creating tokenizer_config.json...")
42
+ tokenizer_config = {
43
+ "bos_token": "<s>",
44
+ "eos_token": "</s>",
45
+ "pad_token": "</s>",
46
+ "unk_token": "<unk>",
47
+ "model_max_length": 32768,
48
+ "clean_up_tokenization_spaces": False
49
+ }
50
+ with open(tokenizer_config_path, "w") as f:
51
+ json.dump(tokenizer_config, f, indent=2)
52
+
53
+ # 4. Create special_tokens_map.json
54
+ special_tokens_path = Path(model_path) / "special_tokens_map.json"
55
+ if not special_tokens_path.exists():
56
+ print("πŸ“ Creating special_tokens_map.json...")
57
+ special_tokens = {
58
+ "bos_token": "<s>",
59
+ "eos_token": "</s>",
60
+ "pad_token": "</s>",
61
+ "unk_token": "<unk>"
62
+ }
63
+ with open(special_tokens_path, "w") as f:
64
+ json.dump(special_tokens, f, indent=2)
65
+
66
+ # 5. Create README.md
67
+ readme_path = Path(model_path) / "README.md"
68
+ if not readme_path.exists():
69
+ print("πŸ“ Creating README.md...")
70
+ readme_content = """---
71
+ language:
72
+ - en
73
+ license: apache-2.0
74
+ tags:
75
+ - generated_from_trainer
76
+ - mistral
77
+ - text-generation
78
+ ---
79
+
80
+ # Model Card
81
+
82
+ ## Model Description
83
+
84
+ This model is a fine-tuned version of Mistral.
85
+
86
+ ## Usage
87
+
88
+ ```python
89
+ from transformers import AutoModelForCausalLM, AutoTokenizer
90
+
91
+ model = AutoModelForCausalLM.from_pretrained("abdelac/Mistral_Test")
92
+ tokenizer = AutoTokenizer.from_pretrained("abdelac/Mistral_Test")
93
+
94
+ prompt = "Explain machine learning"
95
+ inputs = tokenizer(prompt, return_tensors="pt")
96
+ outputs = model.generate(**inputs, max_new_tokens=100)
97
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
push.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import upload_folder
2
+
3
+ upload_folder(
4
+ folder_path="./lora-out",
5
+ repo_id="abdelac/tinyllama-lora",
6
+ repo_type="model"
7
+ )
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt
2
+
3
+
4
+ uvicorn[standard]==0.24.0
5
+ sentencepiece==0.1.99
6
+ bitsandbytes==0.41.1
7
+
8
+ torch
9
+ transformers
10
+ datasets
11
+ peft
12
+ accelerate
13
+ bitsandbytes
14
+ gradio
train.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from datasets import load_dataset
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
4
+ from peft import LoraConfig, get_peft_model
5
+
6
+ MODEL_ID = "abdelac/tinyllama"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ MODEL_ID,
11
+ load_in_4bit=True,
12
+ device_map="auto"
13
+ )
14
+
15
+ dataset = load_dataset("json", data_files="data.json")["train"]
16
+
17
+ def tokenize(example):
18
+ text = f"### Instruction:\n{example['instruction']}\n### Response:\n{example['output']}"
19
+ return tokenizer(text, truncation=True, padding="max_length", max_length=512)
20
+
21
+ dataset = dataset.map(tokenize)
22
+
23
+ lora_config = LoraConfig(
24
+ r=8,
25
+ lora_alpha=16,
26
+ target_modules=["q_proj", "v_proj"],
27
+ lora_dropout=0.05,
28
+ task_type="CAUSAL_LM"
29
+ )
30
+
31
+ model = get_peft_model(model, lora_config)
32
+
33
+ training_args = TrainingArguments(
34
+ output_dir="./lora-out",
35
+ per_device_train_batch_size=1,
36
+ gradient_accumulation_steps=4,
37
+ num_train_epochs=2,
38
+ fp16=True,
39
+ logging_steps=10,
40
+ save_strategy="epoch"
41
+ )
42
+
43
+ trainer = Trainer(
44
+ model=model,
45
+ args=training_args,
46
+ train_dataset=dataset
47
+ )
48
+
49
+ trainer.train()
50
+ model.save_pretrained("./lora-out")
upload.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import upload_folder
2
+
3
+ upload_folder(
4
+ folder_path="./tinyllama", # local model directory
5
+ repo_id="abdelac/tinyllama",
6
+ repo_type="model"
7
+ )
upload_to_hf.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # upload_to_hf.py
2
+ import os
3
+ from huggingface_hub import HfApi, upload_folder, create_repo, login
4
+ from pathlib import Path
5
+ import shutil
6
+
7
+ def upload_model_folder(
8
+ folder_path=".tinyllama",
9
+ repo_id="abdelac/tinyllama",
10
+ repo_type="model",
11
+ token=None,
12
+ private=False,
13
+ commit_message="Upload model files"
14
+ ):
15
+ """
16
+ Upload a folder to Hugging Face Hub
17
+
18
+ Args:
19
+ folder_path: Path to local folder to upload
20
+ repo_id: Hugging Face repository ID (username/repo-name)
21
+ repo_type: Type of repository ('model', 'dataset', 'space')
22
+ token: Hugging Face token (optional, will prompt if not provided)
23
+ private: Whether repository should be private
24
+ commit_message: Commit message for the upload
25
+ """
26
+
27
+ # Check if folder exists
28
+ if not os.path.exists(folder_path):
29
+ print(f"❌ Error: Folder '{folder_path}' does not exist!")
30
+ return False
31
+
32
+ # Login to Hugging Face
33
+ try:
34
+ login(token=token)
35
+ print("βœ… Logged in to Hugging Face")
36
+ except Exception as e:
37
+ print(f"❌ Login failed: {e}")
38
+ return False
39
+
40
+ # Check repository exists, create if not
41
+ api = HfApi()
42
+ try:
43
+ # Try to get repo info
44
+ repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type)
45
+ print(f"βœ… Repository exists: {repo_info.id}")
46
+ except Exception:
47
+ # Repository doesn't exist, create it
48
+ print(f"πŸ“¦ Creating new repository: {repo_id}")
49
+ try:
50
+ api.create_repo(
51
+ repo_id=repo_id,
52
+ repo_type=repo_type,
53
+ private=private,
54
+ exist_ok=True
55
+ )
56
+ print(f"βœ… Repository created successfully!")
57
+ except Exception as e:
58
+ print(f"❌ Failed to create repository: {e}")
59
+ return False
60
+
61
+ # Upload the folder
62
+ print(f"πŸš€ Uploading folder '{folder_path}' to '{repo_id}'...")
63
+ try:
64
+ # Method 1: Using upload_folder (recommended)
65
+ upload_folder(
66
+ folder_path=folder_path,
67
+ repo_id=repo_id,
68
+ repo_type=repo_type,
69
+ commit_message=commit_message,
70
+ commit_description=f"Upload model files from {folder_path}"
71
+ )
72
+
73
+ print(f"βœ… Successfully uploaded to: https://huggingface.co/{repo_id}")
74
+ return True
75
+
76
+ except Exception as e:
77
+ print(f"❌ Upload failed: {e}")
78
+
79
+ # Fallback method using HfApi
80
+ try:
81
+ print("πŸ”„ Trying alternative upload method...")
82
+ api.upload_folder(
83
+ folder_path=folder_path,
84
+ repo_id=repo_id,
85
+ repo_type=repo_type,
86
+ commit_message=commit_message
87
+ )
88
+ print(f"βœ… Alternative method succeeded!")
89
+ return True
90
+ except Exception as e2:
91
+ print(f"❌ Alternative method also failed: {e2}")
92
+ return False
93
+
94
+ # Alternative: Upload specific files only
95
+ def upload_model_files(
96
+ local_dir=".tinyllama",
97
+ repo_id="abdelac/tinyllama",
98
+ ignore_patterns=None
99
+ ):
100
+ """
101
+ Upload specific model files with filtering
102
+ """
103
+ from huggingface_hub import HfApi
104
+
105
+ api = HfApi()
106
+
107
+ # Upload each file individually (for more control)
108
+ for root, dirs, files in os.walk(local_dir):
109
+ for file in files:
110
+ # Skip files matching ignore patterns
111
+ if ignore_patterns:
112
+ skip = False
113
+ for pattern in ignore_patterns:
114
+ if file.endswith(pattern):
115
+ skip = True
116
+ break
117
+ if skip:
118
+ continue
119
+
120
+ file_path = os.path.join(root, file)
121
+ # Get relative path for HF
122
+ rel_path = os.path.relpath(file_path, local_dir)
123
+
124
+ try:
125
+ api.upload_file(
126
+ path_or_fileobj=file_path,
127
+ path_in_repo=rel_path,
128
+ repo_id=repo_id,
129
+ repo_type="model"
130
+ )
131
+ print(f"πŸ“€ Uploaded: {rel_path}")
132
+ except Exception as e:
133
+ print(f"❌ Failed to upload {rel_path}: {e}")
134
+
135
+ # Example usage
136
+ if __name__ == "__main__":
137
+ # Example 1: Simple upload
138
+ upload_model_folder(
139
+ folder_path="./my_model", # Your model folder
140
+ repo_id="abdelac/tinyllama",
141
+ repo_type="model",
142
+ private=False,
143
+ commit_message="Initial model upload"
144
+ )
145
+
146
+ # Example 2: Upload current directory
147
+ # upload_model_folder(
148
+ # folder_path=".",
149
+ # repo_id="abdelac/Mistral_Test",
150
+ # repo_type="model"
151
+ # )