Text Generation
PEFT
Safetensors
Transformers
qwen2
lora
coding
code-generation
conversational
text-generation-inference
Instructions to use girish00/ConicAI_LLM_model with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use girish00/ConicAI_LLM_model with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Coder-0.5B-Instruct") model = PeftModel.from_pretrained(base_model, "girish00/ConicAI_LLM_model") - Transformers
How to use girish00/ConicAI_LLM_model with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="girish00/ConicAI_LLM_model") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("girish00/ConicAI_LLM_model") model = AutoModelForCausalLM.from_pretrained("girish00/ConicAI_LLM_model") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use girish00/ConicAI_LLM_model with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "girish00/ConicAI_LLM_model" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "girish00/ConicAI_LLM_model", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/girish00/ConicAI_LLM_model
- SGLang
How to use girish00/ConicAI_LLM_model with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "girish00/ConicAI_LLM_model" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "girish00/ConicAI_LLM_model", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "girish00/ConicAI_LLM_model" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "girish00/ConicAI_LLM_model", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use girish00/ConicAI_LLM_model with Docker Model Runner:
docker model run hf.co/girish00/ConicAI_LLM_model
| """ | |
| Local LoRA fine-tuning script for a small coding model. | |
| Quick start (Windows/Linux local): | |
| 1) pip install transformers datasets peft accelerate bitsandbytes huggingface_hub | |
| 2) python finetune_coding_llm_colab.py --dataset-size 8000 | |
| 3) Optional upload: | |
| python finetune_coding_llm_colab.py --skip-train --upload --hf-repo your-user/your-model | |
| """ | |
| import argparse | |
| import json | |
| import os | |
| import random | |
| import torch | |
| from datasets import load_dataset | |
| from huggingface_hub import upload_folder | |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| BitsAndBytesConfig, | |
| Trainer, | |
| TrainingArguments, | |
| ) | |
| DEFAULT_MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct" | |
| DEFAULT_OUTPUT_DIR = "./model" | |
| DEFAULT_TRAIN_FILE = "train.json" | |
| HF_REPO_ID = "your-username/coding-llm-model" | |
| # Keep dataset size in the requested 5k-10k window. | |
| DATASET_SIZE = 8000 | |
| TEMPLATES = [ | |
| { | |
| "instruction": "Fix the Python code", | |
| "input": "def add(a,b) return a+b", | |
| "output": "def add(a, b): return a + b", | |
| "explanation": "Added missing colon and corrected syntax.", | |
| }, | |
| { | |
| "instruction": "Fix loop syntax", | |
| "input": "for i in range(5 print(i)", | |
| "output": "for i in range(5): print(i)", | |
| "explanation": "Added missing parenthesis and colon.", | |
| }, | |
| { | |
| "instruction": "Fix condition", | |
| "input": "if x = 10: print(x)", | |
| "output": "if x == 10: print(x)", | |
| "explanation": "Corrected assignment to comparison operator.", | |
| }, | |
| { | |
| "instruction": "Explain code", | |
| "input": "for i in range(3): print(i)", | |
| "output": "Prints numbers from 0 to 2.", | |
| "explanation": "Loop iterates from 0 to 2 and prints values.", | |
| }, | |
| { | |
| "instruction": "Write a Python function", | |
| "input": "Create a function to multiply two numbers", | |
| "output": "def multiply(a, b):\n return a * b", | |
| "explanation": "Defined a multiply function that returns the product of two inputs.", | |
| }, | |
| { | |
| "instruction": "Write a Python function", | |
| "input": "Create a function to add two numbers", | |
| "output": "def add(a, b):\n return a + b", | |
| "explanation": "Defined an add function that returns the sum of two inputs.", | |
| }, | |
| { | |
| "instruction": "Write a Python function", | |
| "input": "Create a function to subtract two numbers", | |
| "output": "def subtract(a, b):\n return a - b", | |
| "explanation": "Defined a subtract function that returns the difference between two inputs.", | |
| }, | |
| { | |
| "instruction": "Write a Python function", | |
| "input": "Create a function to divide two numbers", | |
| "output": "def divide(a, b):\n return a / b", | |
| "explanation": "Defined a divide function that returns the quotient of two inputs.", | |
| }, | |
| ] | |
| def format_training_text(template): | |
| target = { | |
| "code": template["output"], | |
| "explanation": template["explanation"], | |
| } | |
| return ( | |
| f"Instruction: {template['instruction']}\n" | |
| f"Input: {template['input']}\n" | |
| "Return only valid JSON with keys code and explanation.\n" | |
| f"JSON: {json.dumps(target, ensure_ascii=False)}\n" | |
| ) | |
| def generate_sample(): | |
| template = random.choice(TEMPLATES) | |
| text = format_training_text(template) | |
| return { | |
| "instruction": template["instruction"], | |
| "input": template["input"], | |
| "output": template["output"], | |
| "explanation": template["explanation"], | |
| "text": text, | |
| "confidence": round(random.uniform(0.9, 0.99), 2), | |
| "relevancy": round(random.uniform(0.85, 0.99), 2), | |
| } | |
| def build_dataset(train_file, size=DATASET_SIZE): | |
| dataset = [generate_sample() for _ in range(size)] | |
| with open(train_file, "w", encoding="utf-8") as f: | |
| json.dump(dataset, f, indent=2) | |
| print(f"Dataset created: {len(dataset)} samples -> {train_file}") | |
| def run_training( | |
| model_name, | |
| train_file, | |
| output_dir, | |
| epochs, | |
| batch_size, | |
| learning_rate, | |
| max_length, | |
| max_train_samples, | |
| use_4bit, | |
| ): | |
| if not os.path.exists(train_file): | |
| raise FileNotFoundError( | |
| f"Training file not found: {train_file}. Generate it with generate_dataset.py first." | |
| ) | |
| dataset = load_dataset("json", data_files=train_file) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| def format_data(example): | |
| text = example.get("text") | |
| if not text: | |
| text = format_training_text(example) | |
| tokens = tokenizer( | |
| text, | |
| truncation=True, | |
| padding="max_length", | |
| max_length=max_length, | |
| ) | |
| tokens["labels"] = tokens["input_ids"].copy() | |
| return tokens | |
| tokenized = dataset.map( | |
| format_data, | |
| remove_columns=dataset["train"].column_names, | |
| desc="Tokenizing training dataset", | |
| ) | |
| if max_train_samples > 0: | |
| max_train_samples = min(max_train_samples, len(tokenized["train"])) | |
| tokenized["train"] = tokenized["train"].select(range(max_train_samples)) | |
| fp16_enabled = torch.cuda.is_available() | |
| quantize_4bit = use_4bit and torch.cuda.is_available() | |
| if use_4bit and not torch.cuda.is_available(): | |
| print("Warning: --use-4bit requested but CUDA not available. Falling back to standard loading.") | |
| if quantize_4bit: | |
| bnb_config = BitsAndBytesConfig(load_in_4bit=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| quantization_config=bnb_config, | |
| device_map="auto", | |
| ) | |
| model = prepare_model_for_kbit_training(model) | |
| else: | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| device_map="auto" if torch.cuda.is_available() else None, | |
| ) | |
| lora_config = LoraConfig( | |
| r=8, | |
| lora_alpha=16, | |
| target_modules=["q_proj", "v_proj"], | |
| lora_dropout=0.05, | |
| bias="none", | |
| task_type="CAUSAL_LM", | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| training_args = TrainingArguments( | |
| output_dir=output_dir, | |
| per_device_train_batch_size=batch_size, | |
| num_train_epochs=epochs, | |
| gradient_accumulation_steps=2, | |
| logging_steps=10, | |
| save_steps=100, | |
| learning_rate=learning_rate, | |
| fp16=fp16_enabled, | |
| dataloader_pin_memory=torch.cuda.is_available(), | |
| report_to="none", | |
| ) | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=tokenized["train"], | |
| ) | |
| trainer.train() | |
| model.save_pretrained(output_dir) | |
| tokenizer.save_pretrained(output_dir) | |
| print(f"Model and tokenizer saved to: {output_dir}") | |
| def upload_to_hf(repo_id, output_dir): | |
| if not os.path.exists(output_dir): | |
| raise FileNotFoundError( | |
| f"Model output folder not found: {output_dir}. Run training before upload." | |
| ) | |
| upload_folder( | |
| folder_path=output_dir, | |
| repo_id=repo_id, | |
| repo_type="model", | |
| ) | |
| print(f"Uploaded to Hugging Face repo: {repo_id}") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--dataset-size", type=int, default=DATASET_SIZE) | |
| parser.add_argument("--train-file", type=str, default=DEFAULT_TRAIN_FILE) | |
| parser.add_argument("--output-dir", type=str, default=DEFAULT_OUTPUT_DIR) | |
| parser.add_argument("--model-name", type=str, default=DEFAULT_MODEL_NAME) | |
| parser.add_argument("--epochs", type=float, default=1) | |
| parser.add_argument("--batch-size", type=int, default=2) | |
| parser.add_argument("--learning-rate", type=float, default=2e-4) | |
| parser.add_argument("--max-length", type=int, default=512) | |
| parser.add_argument("--max-train-samples", type=int, default=0) | |
| parser.add_argument("--use-4bit", action="store_true") | |
| parser.add_argument("--skip-dataset-gen", action="store_true") | |
| parser.add_argument("--skip-train", action="store_true") | |
| parser.add_argument("--upload", action="store_true") | |
| parser.add_argument("--hf-repo", type=str, default=HF_REPO_ID) | |
| args = parser.parse_args() | |
| if not (5000 <= args.dataset_size <= 10000): | |
| raise ValueError("dataset-size must be between 5000 and 10000") | |
| if not args.skip_dataset_gen: | |
| build_dataset(train_file=args.train_file, size=args.dataset_size) | |
| if not args.skip_train: | |
| run_training( | |
| model_name=args.model_name, | |
| train_file=args.train_file, | |
| output_dir=args.output_dir, | |
| epochs=args.epochs, | |
| batch_size=args.batch_size, | |
| learning_rate=args.learning_rate, | |
| max_length=args.max_length, | |
| max_train_samples=args.max_train_samples, | |
| use_4bit=args.use_4bit, | |
| ) | |
| if args.upload: | |
| upload_to_hf(repo_id=args.hf_repo, output_dir=args.output_dir) | |