Spaces:
Build error
Build error
Add application file
Browse files- api_fastapi.py +82 -0
- app.py +51 -0
- app_gradio.py +86 -0
- basic_inference.py +20 -0
- data.json +10 -0
- deepseek_dockerfile_20251226_58f521.dockerfile +17 -0
- dockerfile +17 -0
- dockerfile.dockerfile +9 -0
- inference.py +70 -0
- optimization.py +30 -0
- optimized_loading.py +20 -0
- prepare_model.py +97 -0
- push.py +7 -0
- requirements.txt +14 -0
- train.py +50 -0
- upload.py +7 -0
- upload_to_hf.py +151 -0
api_fastapi.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# api_fastapi.py
|
| 2 |
+
from fastapi import FastAPI, HTTPException
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 5 |
+
import torch
|
| 6 |
+
import uvicorn
|
| 7 |
+
|
| 8 |
+
app = FastAPI(title="Mistral API")
|
| 9 |
+
|
| 10 |
+
class ChatRequest(BaseModel):
|
| 11 |
+
prompt: str
|
| 12 |
+
max_tokens: int = 500
|
| 13 |
+
temperature: float = 0.7
|
| 14 |
+
|
| 15 |
+
# Global model instance
|
| 16 |
+
MODEL = None
|
| 17 |
+
TOKENIZER = None
|
| 18 |
+
|
| 19 |
+
@app.on_event("startup")
|
| 20 |
+
async def load_model():
|
| 21 |
+
global MODEL, TOKENIZER
|
| 22 |
+
try:
|
| 23 |
+
TOKENIZER = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
| 24 |
+
MODEL = AutoModelForCausalLM.from_pretrained(
|
| 25 |
+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 26 |
+
torch_dtype=torch.float16,
|
| 27 |
+
device_map="auto",
|
| 28 |
+
load_in_8bit=True
|
| 29 |
+
)
|
| 30 |
+
print("Model loaded successfully!")
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f"Error loading model: {e}")
|
| 33 |
+
|
| 34 |
+
@app.get("/health")
|
| 35 |
+
async def health():
|
| 36 |
+
return {"status": "healthy", "model_loaded": MODEL is not None}
|
| 37 |
+
|
| 38 |
+
@app.post("/chat")
|
| 39 |
+
async def chat_completion(request: ChatRequest):
|
| 40 |
+
if MODEL is None:
|
| 41 |
+
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
# Format prompt
|
| 45 |
+
formatted_prompt = f"[INST] {request.prompt} [/INST]"
|
| 46 |
+
|
| 47 |
+
# Tokenize
|
| 48 |
+
inputs = TOKENIZER(formatted_prompt, return_tensors="pt").to(MODEL.device)
|
| 49 |
+
|
| 50 |
+
# Generate
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
outputs = MODEL.generate(
|
| 53 |
+
**inputs,
|
| 54 |
+
max_new_tokens=request.max_tokens,
|
| 55 |
+
temperature=request.temperature,
|
| 56 |
+
do_sample=True,
|
| 57 |
+
top_p=0.95
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Decode
|
| 61 |
+
response = TOKENIZER.decode(outputs[0], skip_special_tokens=True)
|
| 62 |
+
response = response.split("[/INST]")[-1].strip()
|
| 63 |
+
|
| 64 |
+
return {
|
| 65 |
+
"response": response,
|
| 66 |
+
"tokens_generated": len(outputs[0]) - len(inputs.input_ids[0])
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
except Exception as e:
|
| 70 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 71 |
+
|
| 72 |
+
@app.post("/batch_chat")
|
| 73 |
+
async def batch_chat(requests: list[ChatRequest]):
|
| 74 |
+
"""Process multiple prompts at once"""
|
| 75 |
+
responses = []
|
| 76 |
+
for req in requests:
|
| 77 |
+
result = await chat_completion(req)
|
| 78 |
+
responses.append(result)
|
| 79 |
+
return {"responses": responses}
|
| 80 |
+
|
| 81 |
+
if __name__ == "__main__":
|
| 82 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
app.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 4 |
+
from peft import PeftModel
|
| 5 |
+
|
| 6 |
+
BASE_MODEL = "abdelac/tinyllama"
|
| 7 |
+
LORA_MODEL = "abdelac/tinyllama-lora"
|
| 8 |
+
|
| 9 |
+
# Load tokenizer
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
| 11 |
+
|
| 12 |
+
# Load base model (4-bit for low RAM)
|
| 13 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
| 14 |
+
BASE_MODEL,
|
| 15 |
+
load_in_4bit=True,
|
| 16 |
+
device_map="auto",
|
| 17 |
+
torch_dtype=torch.float16
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# Load LoRA adapters
|
| 21 |
+
model = PeftModel.from_pretrained(base_model, LORA_MODEL)
|
| 22 |
+
model.eval()
|
| 23 |
+
|
| 24 |
+
def chat(prompt, max_tokens=200, temperature=0.7):
|
| 25 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 26 |
+
|
| 27 |
+
with torch.no_grad():
|
| 28 |
+
outputs = model.generate(
|
| 29 |
+
**inputs,
|
| 30 |
+
max_new_tokens=max_tokens,
|
| 31 |
+
do_sample=True,
|
| 32 |
+
temperature=temperature,
|
| 33 |
+
top_p=0.9
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 37 |
+
|
| 38 |
+
# Gradio UI
|
| 39 |
+
demo = gr.Interface(
|
| 40 |
+
fn=chat,
|
| 41 |
+
inputs=[
|
| 42 |
+
gr.Textbox(lines=4, label="Prompt"),
|
| 43 |
+
gr.Slider(50, 500, value=200, label="Max tokens"),
|
| 44 |
+
gr.Slider(0.1, 1.5, value=0.7, label="Temperature")
|
| 45 |
+
],
|
| 46 |
+
outputs="text",
|
| 47 |
+
title="TinyLlama Fine-Tuned (LoRA)",
|
| 48 |
+
description="TinyLlama loaded with LoRA adapters for domain-specific inference"
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
demo.launch()
|
app_gradio.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app_gradio.py
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
class MistralApp:
|
| 7 |
+
def __init__(self):
|
| 8 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 9 |
+
self.model = None
|
| 10 |
+
self.tokenizer = None
|
| 11 |
+
|
| 12 |
+
def load_model(self):
|
| 13 |
+
if self.model is None:
|
| 14 |
+
print("Loading model...")
|
| 15 |
+
self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
| 16 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
| 17 |
+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 18 |
+
torch_dtype=torch.float16,
|
| 19 |
+
device_map="auto",
|
| 20 |
+
load_in_8bit=True # Reduce memory usage
|
| 21 |
+
)
|
| 22 |
+
print("Model loaded!")
|
| 23 |
+
return "Model loaded successfully!"
|
| 24 |
+
|
| 25 |
+
def respond(self, message, history):
|
| 26 |
+
# Format chat history
|
| 27 |
+
formatted_prompt = self.format_chat(message, history)
|
| 28 |
+
|
| 29 |
+
inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.device)
|
| 30 |
+
|
| 31 |
+
with torch.no_grad():
|
| 32 |
+
outputs = self.model.generate(
|
| 33 |
+
**inputs,
|
| 34 |
+
max_new_tokens=512,
|
| 35 |
+
temperature=0.7,
|
| 36 |
+
do_sample=True
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 40 |
+
return response.split("[/INST]")[-1].strip()
|
| 41 |
+
|
| 42 |
+
def format_chat(self, message, history):
|
| 43 |
+
prompt = ""
|
| 44 |
+
for user_msg, assistant_msg in history:
|
| 45 |
+
prompt += f"[INST] {user_msg} [/INST] {assistant_msg} "
|
| 46 |
+
prompt += f"[INST] {message} [/INST]"
|
| 47 |
+
return prompt
|
| 48 |
+
|
| 49 |
+
# Create Gradio interface
|
| 50 |
+
app = MistralApp()
|
| 51 |
+
|
| 52 |
+
with gr.Blocks(title="Mistral Chat Assistant") as demo:
|
| 53 |
+
gr.Markdown("# π€ Mistral 7B Chat Assistant")
|
| 54 |
+
|
| 55 |
+
with gr.Row():
|
| 56 |
+
with gr.Column(scale=1):
|
| 57 |
+
load_btn = gr.Button("Load Model", variant="primary")
|
| 58 |
+
status = gr.Textbox(label="Status", interactive=False)
|
| 59 |
+
|
| 60 |
+
with gr.Column(scale=4):
|
| 61 |
+
chatbot = gr.Chatbot(height=500)
|
| 62 |
+
msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
|
| 63 |
+
send_btn = gr.Button("Send", variant="primary")
|
| 64 |
+
clear_btn = gr.Button("Clear")
|
| 65 |
+
|
| 66 |
+
# Connect events
|
| 67 |
+
load_btn.click(app.load_model, outputs=status)
|
| 68 |
+
|
| 69 |
+
def user(user_message, history):
|
| 70 |
+
return "", history + [[user_message, None]]
|
| 71 |
+
|
| 72 |
+
def bot(history):
|
| 73 |
+
response = app.respond(history[-1][0], history[:-1])
|
| 74 |
+
history[-1][1] = response
|
| 75 |
+
return history
|
| 76 |
+
|
| 77 |
+
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 78 |
+
bot, chatbot, chatbot
|
| 79 |
+
)
|
| 80 |
+
send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 81 |
+
bot, chatbot, chatbot
|
| 82 |
+
)
|
| 83 |
+
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
| 84 |
+
|
| 85 |
+
if __name__ == "__main__":
|
| 86 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
|
basic_inference.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# basic_inference.py
|
| 2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 6 |
+
print(f"Using device: {device}")
|
| 7 |
+
|
| 8 |
+
# Load from local directory or Hugging Face
|
| 9 |
+
model_path = "./tinyllama" # or "mistralai/Mistral-7B-Instruct-v0.1"
|
| 10 |
+
|
| 11 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 12 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 13 |
+
model_path,
|
| 14 |
+
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 15 |
+
device_map="auto" if device == "cuda" else None,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# Move to device if not using device_map
|
| 19 |
+
if device == "cuda":
|
| 20 |
+
model = model.to(device)
|
data.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"instruction": "Explain fake news",
|
| 4 |
+
"output": "Fake news is false or misleading information presented as news."
|
| 5 |
+
},
|
| 6 |
+
{
|
| 7 |
+
"instruction": "What is AI?",
|
| 8 |
+
"output": "Artificial Intelligence is the simulation of human intelligence by machines."
|
| 9 |
+
}
|
| 10 |
+
]
|
deepseek_dockerfile_20251226_58f521.dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Install dependencies
|
| 7 |
+
COPY requirements.txt .
|
| 8 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 9 |
+
|
| 10 |
+
# Copy application
|
| 11 |
+
COPY . .
|
| 12 |
+
|
| 13 |
+
# Expose port
|
| 14 |
+
EXPOSE 8000
|
| 15 |
+
|
| 16 |
+
# Run the application
|
| 17 |
+
CMD ["python", "api_fastapi.py"]
|
dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Install dependencies
|
| 7 |
+
COPY requirements.txt .
|
| 8 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 9 |
+
|
| 10 |
+
# Copy application
|
| 11 |
+
COPY . .
|
| 12 |
+
|
| 13 |
+
# Expose port
|
| 14 |
+
EXPOSE 8000
|
| 15 |
+
|
| 16 |
+
# Run the application
|
| 17 |
+
CMD ["python", "api_fastapi.py"]
|
dockerfile.dockerfile
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# requirements.txt
|
| 2 |
+
torch==2.1.0
|
| 3 |
+
transformers==4.35.0
|
| 4 |
+
accelerate==0.24.1
|
| 5 |
+
fastapi==0.104.1
|
| 6 |
+
uvicorn[standard]==0.24.0
|
| 7 |
+
gradio==4.8.0
|
| 8 |
+
sentencepiece==0.1.99
|
| 9 |
+
bitsandbytes==0.41.1
|
inference.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# inference.py
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
|
| 4 |
+
|
| 5 |
+
class MistralChat:
|
| 6 |
+
def __init__(self, model_path="TinyLlama/TinyLlama-1.1B-Chat-v1.0"):
|
| 7 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 8 |
+
|
| 9 |
+
print("Loading model...")
|
| 10 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 11 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
| 12 |
+
model_path,
|
| 13 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
| 14 |
+
device_map="auto" if self.device == "cuda" else None,
|
| 15 |
+
trust_remote_code=True
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
if self.device == "cuda":
|
| 19 |
+
self.model = self.model.to(self.device)
|
| 20 |
+
|
| 21 |
+
print("Model loaded successfully!")
|
| 22 |
+
|
| 23 |
+
def generate(self, prompt, max_length=500, temperature=0.7):
|
| 24 |
+
# Format for instruct models
|
| 25 |
+
formatted_prompt = f"[INST] {prompt} [/INST]"
|
| 26 |
+
|
| 27 |
+
inputs = self.tokenizer(formatted_prompt, return_tensors="pt")
|
| 28 |
+
if self.device == "cuda":
|
| 29 |
+
inputs = inputs.to(self.device)
|
| 30 |
+
|
| 31 |
+
with torch.no_grad():
|
| 32 |
+
outputs = self.model.generate(
|
| 33 |
+
**inputs,
|
| 34 |
+
max_new_tokens=max_length,
|
| 35 |
+
temperature=temperature,
|
| 36 |
+
do_sample=True,
|
| 37 |
+
top_p=0.95,
|
| 38 |
+
pad_token_id=self.tokenizer.eos_token_id
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 42 |
+
# Extract only the assistant's response
|
| 43 |
+
if "[/INST]" in response:
|
| 44 |
+
response = response.split("[/INST]")[1].strip()
|
| 45 |
+
|
| 46 |
+
return response
|
| 47 |
+
|
| 48 |
+
def chat_stream(self, prompt):
|
| 49 |
+
"""Stream the response token by token"""
|
| 50 |
+
formatted_prompt = f"[INST] {prompt} [/INST]"
|
| 51 |
+
inputs = self.tokenizer(formatted_prompt, return_tensors="pt")
|
| 52 |
+
|
| 53 |
+
streamer = TextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 54 |
+
|
| 55 |
+
if self.device == "cuda":
|
| 56 |
+
inputs = inputs.to(self.device)
|
| 57 |
+
|
| 58 |
+
_ = self.model.generate(**inputs, streamer=streamer, max_new_tokens=500)
|
| 59 |
+
|
| 60 |
+
# Usage
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
chat = MistralChat()
|
| 63 |
+
|
| 64 |
+
# Single response
|
| 65 |
+
response = chat.generate("Explain quantum computing in simple terms")
|
| 66 |
+
print("Response:", response)
|
| 67 |
+
|
| 68 |
+
# Streaming response
|
| 69 |
+
print("\nStreaming response:")
|
| 70 |
+
chat.chat_stream("Write a short poem about AI")
|
optimization.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# optimization.py
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 4 |
+
|
| 5 |
+
# 1. Use pipeline for simplicity
|
| 6 |
+
pipe = pipeline(
|
| 7 |
+
"text-generation",
|
| 8 |
+
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 9 |
+
model_kwargs={
|
| 10 |
+
"torch_dtype": torch.float16,
|
| 11 |
+
"device_map": "auto",
|
| 12 |
+
"load_in_4bit": True
|
| 13 |
+
},
|
| 14 |
+
tokenizer="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
# 2. Use vLLM for high-throughput (install: pip install vLLM)
|
| 18 |
+
from vllm import LLM, SamplingParams
|
| 19 |
+
|
| 20 |
+
llm = LLM(model="mTinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
| 21 |
+
sampling_params = SamplingParams(temperature=0.7, max_tokens=500)
|
| 22 |
+
outputs = llm.generate(["Hello, how are you?"], sampling_params)
|
| 23 |
+
|
| 24 |
+
# 3. Cache model responses
|
| 25 |
+
import hashlib
|
| 26 |
+
from functools import lru_cache
|
| 27 |
+
|
| 28 |
+
@lru_cache(maxsize=1000)
|
| 29 |
+
def cached_generation(prompt, max_tokens=500):
|
| 30 |
+
return pipe(prompt, max_new_tokens=max_tokens)[0]['generated_text']
|
optimized_loading.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# optimized_loading.py
|
| 2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
+
import torch
|
| 4 |
+
from accelerate import infer_auto_device_map
|
| 5 |
+
|
| 6 |
+
# 4-bit quantization (reduces memory by 75%)
|
| 7 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 8 |
+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 9 |
+
load_in_4bit=True, # or load_in_8bit=True for 8-bit
|
| 10 |
+
device_map="auto",
|
| 11 |
+
torch_dtype=torch.float16,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
# CPU offloading (for low RAM)
|
| 15 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 16 |
+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 17 |
+
device_map="auto",
|
| 18 |
+
offload_folder="offload",
|
| 19 |
+
offload_state_dict=True,
|
| 20 |
+
)
|
prepare_model.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# prepare_model.py
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
import shutil
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
def create_minimal_model_structure(model_path="."):
|
| 8 |
+
"""
|
| 9 |
+
Create minimal required files for Hugging Face model upload
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
# Create directories if they don't exist
|
| 13 |
+
os.makedirs(model_path, exist_ok=True)
|
| 14 |
+
|
| 15 |
+
# 1. Check for model files
|
| 16 |
+
model_files = list(Path(model_path).glob("*.safetensors")) + \
|
| 17 |
+
list(Path(model_path).glob("*.bin")) + \
|
| 18 |
+
list(Path(model_path).glob("pytorch_model*.bin"))
|
| 19 |
+
|
| 20 |
+
if not model_files:
|
| 21 |
+
print("β οΈ Warning: No model weight files found!")
|
| 22 |
+
print(" Expected: *.safetensors, *.bin, or pytorch_model*.bin")
|
| 23 |
+
|
| 24 |
+
# 2. Create config.json if missing
|
| 25 |
+
config_path = Path(model_path) / "config.json"
|
| 26 |
+
if not config_path.exists():
|
| 27 |
+
print("π Creating minimal config.json...")
|
| 28 |
+
config = {
|
| 29 |
+
"_name_or_path": "abdelac/Mistral_Test",
|
| 30 |
+
"architectures": ["MistralForCausalLM"], # Adjust based on your model
|
| 31 |
+
"model_type": "mistral",
|
| 32 |
+
"torch_dtype": "float16",
|
| 33 |
+
"transformers_version": "4.35.0"
|
| 34 |
+
}
|
| 35 |
+
with open(config_path, "w") as f:
|
| 36 |
+
json.dump(config, f, indent=2)
|
| 37 |
+
|
| 38 |
+
# 3. Create tokenizer files if missing
|
| 39 |
+
tokenizer_config_path = Path(model_path) / "tokenizer_config.json"
|
| 40 |
+
if not tokenizer_config_path.exists():
|
| 41 |
+
print("π Creating tokenizer_config.json...")
|
| 42 |
+
tokenizer_config = {
|
| 43 |
+
"bos_token": "<s>",
|
| 44 |
+
"eos_token": "</s>",
|
| 45 |
+
"pad_token": "</s>",
|
| 46 |
+
"unk_token": "<unk>",
|
| 47 |
+
"model_max_length": 32768,
|
| 48 |
+
"clean_up_tokenization_spaces": False
|
| 49 |
+
}
|
| 50 |
+
with open(tokenizer_config_path, "w") as f:
|
| 51 |
+
json.dump(tokenizer_config, f, indent=2)
|
| 52 |
+
|
| 53 |
+
# 4. Create special_tokens_map.json
|
| 54 |
+
special_tokens_path = Path(model_path) / "special_tokens_map.json"
|
| 55 |
+
if not special_tokens_path.exists():
|
| 56 |
+
print("π Creating special_tokens_map.json...")
|
| 57 |
+
special_tokens = {
|
| 58 |
+
"bos_token": "<s>",
|
| 59 |
+
"eos_token": "</s>",
|
| 60 |
+
"pad_token": "</s>",
|
| 61 |
+
"unk_token": "<unk>"
|
| 62 |
+
}
|
| 63 |
+
with open(special_tokens_path, "w") as f:
|
| 64 |
+
json.dump(special_tokens, f, indent=2)
|
| 65 |
+
|
| 66 |
+
# 5. Create README.md
|
| 67 |
+
readme_path = Path(model_path) / "README.md"
|
| 68 |
+
if not readme_path.exists():
|
| 69 |
+
print("π Creating README.md...")
|
| 70 |
+
readme_content = """---
|
| 71 |
+
language:
|
| 72 |
+
- en
|
| 73 |
+
license: apache-2.0
|
| 74 |
+
tags:
|
| 75 |
+
- generated_from_trainer
|
| 76 |
+
- mistral
|
| 77 |
+
- text-generation
|
| 78 |
+
---
|
| 79 |
+
|
| 80 |
+
# Model Card
|
| 81 |
+
|
| 82 |
+
## Model Description
|
| 83 |
+
|
| 84 |
+
This model is a fine-tuned version of Mistral.
|
| 85 |
+
|
| 86 |
+
## Usage
|
| 87 |
+
|
| 88 |
+
```python
|
| 89 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 90 |
+
|
| 91 |
+
model = AutoModelForCausalLM.from_pretrained("abdelac/Mistral_Test")
|
| 92 |
+
tokenizer = AutoTokenizer.from_pretrained("abdelac/Mistral_Test")
|
| 93 |
+
|
| 94 |
+
prompt = "Explain machine learning"
|
| 95 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
| 96 |
+
outputs = model.generate(**inputs, max_new_tokens=100)
|
| 97 |
+
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
push.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import upload_folder
|
| 2 |
+
|
| 3 |
+
upload_folder(
|
| 4 |
+
folder_path="./lora-out",
|
| 5 |
+
repo_id="abdelac/tinyllama-lora",
|
| 6 |
+
repo_type="model"
|
| 7 |
+
)
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# requirements.txt
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
uvicorn[standard]==0.24.0
|
| 5 |
+
sentencepiece==0.1.99
|
| 6 |
+
bitsandbytes==0.41.1
|
| 7 |
+
|
| 8 |
+
torch
|
| 9 |
+
transformers
|
| 10 |
+
datasets
|
| 11 |
+
peft
|
| 12 |
+
accelerate
|
| 13 |
+
bitsandbytes
|
| 14 |
+
gradio
|
train.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from datasets import load_dataset
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
|
| 4 |
+
from peft import LoraConfig, get_peft_model
|
| 5 |
+
|
| 6 |
+
MODEL_ID = "abdelac/tinyllama"
|
| 7 |
+
|
| 8 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 9 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 10 |
+
MODEL_ID,
|
| 11 |
+
load_in_4bit=True,
|
| 12 |
+
device_map="auto"
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
dataset = load_dataset("json", data_files="data.json")["train"]
|
| 16 |
+
|
| 17 |
+
def tokenize(example):
|
| 18 |
+
text = f"### Instruction:\n{example['instruction']}\n### Response:\n{example['output']}"
|
| 19 |
+
return tokenizer(text, truncation=True, padding="max_length", max_length=512)
|
| 20 |
+
|
| 21 |
+
dataset = dataset.map(tokenize)
|
| 22 |
+
|
| 23 |
+
lora_config = LoraConfig(
|
| 24 |
+
r=8,
|
| 25 |
+
lora_alpha=16,
|
| 26 |
+
target_modules=["q_proj", "v_proj"],
|
| 27 |
+
lora_dropout=0.05,
|
| 28 |
+
task_type="CAUSAL_LM"
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
model = get_peft_model(model, lora_config)
|
| 32 |
+
|
| 33 |
+
training_args = TrainingArguments(
|
| 34 |
+
output_dir="./lora-out",
|
| 35 |
+
per_device_train_batch_size=1,
|
| 36 |
+
gradient_accumulation_steps=4,
|
| 37 |
+
num_train_epochs=2,
|
| 38 |
+
fp16=True,
|
| 39 |
+
logging_steps=10,
|
| 40 |
+
save_strategy="epoch"
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
trainer = Trainer(
|
| 44 |
+
model=model,
|
| 45 |
+
args=training_args,
|
| 46 |
+
train_dataset=dataset
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
trainer.train()
|
| 50 |
+
model.save_pretrained("./lora-out")
|
upload.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import upload_folder
|
| 2 |
+
|
| 3 |
+
upload_folder(
|
| 4 |
+
folder_path="./tinyllama", # local model directory
|
| 5 |
+
repo_id="abdelac/tinyllama",
|
| 6 |
+
repo_type="model"
|
| 7 |
+
)
|
upload_to_hf.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# upload_to_hf.py
|
| 2 |
+
import os
|
| 3 |
+
from huggingface_hub import HfApi, upload_folder, create_repo, login
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import shutil
|
| 6 |
+
|
| 7 |
+
def upload_model_folder(
|
| 8 |
+
folder_path=".tinyllama",
|
| 9 |
+
repo_id="abdelac/tinyllama",
|
| 10 |
+
repo_type="model",
|
| 11 |
+
token=None,
|
| 12 |
+
private=False,
|
| 13 |
+
commit_message="Upload model files"
|
| 14 |
+
):
|
| 15 |
+
"""
|
| 16 |
+
Upload a folder to Hugging Face Hub
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
folder_path: Path to local folder to upload
|
| 20 |
+
repo_id: Hugging Face repository ID (username/repo-name)
|
| 21 |
+
repo_type: Type of repository ('model', 'dataset', 'space')
|
| 22 |
+
token: Hugging Face token (optional, will prompt if not provided)
|
| 23 |
+
private: Whether repository should be private
|
| 24 |
+
commit_message: Commit message for the upload
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
# Check if folder exists
|
| 28 |
+
if not os.path.exists(folder_path):
|
| 29 |
+
print(f"β Error: Folder '{folder_path}' does not exist!")
|
| 30 |
+
return False
|
| 31 |
+
|
| 32 |
+
# Login to Hugging Face
|
| 33 |
+
try:
|
| 34 |
+
login(token=token)
|
| 35 |
+
print("β
Logged in to Hugging Face")
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f"β Login failed: {e}")
|
| 38 |
+
return False
|
| 39 |
+
|
| 40 |
+
# Check repository exists, create if not
|
| 41 |
+
api = HfApi()
|
| 42 |
+
try:
|
| 43 |
+
# Try to get repo info
|
| 44 |
+
repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type)
|
| 45 |
+
print(f"β
Repository exists: {repo_info.id}")
|
| 46 |
+
except Exception:
|
| 47 |
+
# Repository doesn't exist, create it
|
| 48 |
+
print(f"π¦ Creating new repository: {repo_id}")
|
| 49 |
+
try:
|
| 50 |
+
api.create_repo(
|
| 51 |
+
repo_id=repo_id,
|
| 52 |
+
repo_type=repo_type,
|
| 53 |
+
private=private,
|
| 54 |
+
exist_ok=True
|
| 55 |
+
)
|
| 56 |
+
print(f"β
Repository created successfully!")
|
| 57 |
+
except Exception as e:
|
| 58 |
+
print(f"β Failed to create repository: {e}")
|
| 59 |
+
return False
|
| 60 |
+
|
| 61 |
+
# Upload the folder
|
| 62 |
+
print(f"π Uploading folder '{folder_path}' to '{repo_id}'...")
|
| 63 |
+
try:
|
| 64 |
+
# Method 1: Using upload_folder (recommended)
|
| 65 |
+
upload_folder(
|
| 66 |
+
folder_path=folder_path,
|
| 67 |
+
repo_id=repo_id,
|
| 68 |
+
repo_type=repo_type,
|
| 69 |
+
commit_message=commit_message,
|
| 70 |
+
commit_description=f"Upload model files from {folder_path}"
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
print(f"β
Successfully uploaded to: https://huggingface.co/{repo_id}")
|
| 74 |
+
return True
|
| 75 |
+
|
| 76 |
+
except Exception as e:
|
| 77 |
+
print(f"β Upload failed: {e}")
|
| 78 |
+
|
| 79 |
+
# Fallback method using HfApi
|
| 80 |
+
try:
|
| 81 |
+
print("π Trying alternative upload method...")
|
| 82 |
+
api.upload_folder(
|
| 83 |
+
folder_path=folder_path,
|
| 84 |
+
repo_id=repo_id,
|
| 85 |
+
repo_type=repo_type,
|
| 86 |
+
commit_message=commit_message
|
| 87 |
+
)
|
| 88 |
+
print(f"β
Alternative method succeeded!")
|
| 89 |
+
return True
|
| 90 |
+
except Exception as e2:
|
| 91 |
+
print(f"β Alternative method also failed: {e2}")
|
| 92 |
+
return False
|
| 93 |
+
|
| 94 |
+
# Alternative: Upload specific files only
|
| 95 |
+
def upload_model_files(
|
| 96 |
+
local_dir=".tinyllama",
|
| 97 |
+
repo_id="abdelac/tinyllama",
|
| 98 |
+
ignore_patterns=None
|
| 99 |
+
):
|
| 100 |
+
"""
|
| 101 |
+
Upload specific model files with filtering
|
| 102 |
+
"""
|
| 103 |
+
from huggingface_hub import HfApi
|
| 104 |
+
|
| 105 |
+
api = HfApi()
|
| 106 |
+
|
| 107 |
+
# Upload each file individually (for more control)
|
| 108 |
+
for root, dirs, files in os.walk(local_dir):
|
| 109 |
+
for file in files:
|
| 110 |
+
# Skip files matching ignore patterns
|
| 111 |
+
if ignore_patterns:
|
| 112 |
+
skip = False
|
| 113 |
+
for pattern in ignore_patterns:
|
| 114 |
+
if file.endswith(pattern):
|
| 115 |
+
skip = True
|
| 116 |
+
break
|
| 117 |
+
if skip:
|
| 118 |
+
continue
|
| 119 |
+
|
| 120 |
+
file_path = os.path.join(root, file)
|
| 121 |
+
# Get relative path for HF
|
| 122 |
+
rel_path = os.path.relpath(file_path, local_dir)
|
| 123 |
+
|
| 124 |
+
try:
|
| 125 |
+
api.upload_file(
|
| 126 |
+
path_or_fileobj=file_path,
|
| 127 |
+
path_in_repo=rel_path,
|
| 128 |
+
repo_id=repo_id,
|
| 129 |
+
repo_type="model"
|
| 130 |
+
)
|
| 131 |
+
print(f"π€ Uploaded: {rel_path}")
|
| 132 |
+
except Exception as e:
|
| 133 |
+
print(f"β Failed to upload {rel_path}: {e}")
|
| 134 |
+
|
| 135 |
+
# Example usage
|
| 136 |
+
if __name__ == "__main__":
|
| 137 |
+
# Example 1: Simple upload
|
| 138 |
+
upload_model_folder(
|
| 139 |
+
folder_path="./my_model", # Your model folder
|
| 140 |
+
repo_id="abdelac/tinyllama",
|
| 141 |
+
repo_type="model",
|
| 142 |
+
private=False,
|
| 143 |
+
commit_message="Initial model upload"
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
# Example 2: Upload current directory
|
| 147 |
+
# upload_model_folder(
|
| 148 |
+
# folder_path=".",
|
| 149 |
+
# repo_id="abdelac/Mistral_Test",
|
| 150 |
+
# repo_type="model"
|
| 151 |
+
# )
|