- lora - peft - gemma - safesky-ai - ai-safety - sft - hh-rlhf - text-generation - transformers base_model: google/gemma-2b --- # SafeSky AI - Fine-tuned Gemma-2b for AI Safety (LoRA Adapter)

import os from huggingface_hub import HfApi, login, HfFolder from getpass import getpass

--- 配置信息 ---

REPO_ID = "jinv2/safesky-ai-gemma-2b-sft" # 你的仓库 ID BASE_MODEL_ID = "google/gemma-2b" # 使用的基础模型 ATTRIBUTION = "天算AI (Natural Algorithm)" # 出品人信息

--- Model Card (README.md) 内容模板 ---

def generate_readme_content(repo_id, base_model_id, attribution): # YAML Header for metadata and tags yaml_header = f"""

license: other # 或者选择一个明确的许可证，需与基础模型兼容 language: en tags: - lora - peft - gemma - safesky-ai - ai-safety - sft - hh-rlhf - text-generation - transformers base_model: {base_model_id}

"""

# Main Markdown content
markdown_content = f"""

SafeSky AI - Fine-tuned Gemma-2b for AI Safety (LoRA Adapter)

出品人 (Attribution): {attribution}

This repository contains LoRA adapters for the {base_model_id} model, fine-tuned for enhanced AI safety as part of the SafeSky AI initiative by {attribution}.

Model Description

This model is a fine-tuned version of Google's Gemma-2b using Supervised Fine-tuning (SFT) with LoRA adapters (Parameter-Efficient Fine-Tuning - PEFT). The goal of this fine-tuning is to improve the model's alignment with safety instructions and reduce the likelihood of generating harmful, unethical, or problematic content.

The adapters were extracted from checkpoint-100 of the fine-tuning process.

Training Data

The model was fine-tuned on the "chosen" responses from the Anthropic/hh-rlhf (Helpful and Harmless Reinforcement Learning from Human Feedback) dataset. This dataset contains human preferences over model responses, focusing on helpfulness and harmlessness.

How to Use

To use these LoRA adapters, you need to load the base model ({base_model_id}) and then apply the adapters from this repository ({repo_id}).

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Base model ID
base_model_id = "{base_model_id}"
# Your adapter repository ID
adapter_repo_id = "{repo_id}"

# --- Load Base Model and Tokenizer ---
print(f"Loading base model: {{base_model_id}}")
# Load the base model (choose quantization or full precision based on your hardware)
# Example using bfloat16 (requires capable GPU)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.bfloat16, # Or torch.float16 or use BitsAndBytesConfig for 4-bit
    device_map="auto"
)

print(f"Loading tokenizer: {{base_model_id}}")
# It's usually sufficient to use the base model's tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id)

# --- Load LoRA Adapters ---
print(f"Loading LoRA adapter: {{adapter_repo_id}}")
# Load the PeftModel by merging the adapter from your repo onto the base model
model = PeftModel.from_pretrained(base_model, adapter_repo_id)
print("LoRA adapter loaded and applied successfully!")

# --- Optional: Merge for faster inference (requires more memory) ---
# print("Attempting to merge model...")
# model = model.merge_and_unload()
# print("Model merged.")

# --- Inference Example ---
prompt = "Human: Please explain the concept of AI safety. Assistant:"
# Format the input using the chat template appropriate for Gemma
inputs = tokenizer(f"<start_of_turn>user\\n{{prompt}}<end_of_turn>\\n<start_of_turn>model\\n", return_tensors="pt").to(model.device)

print("Generating response...")
with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=150, do_sample=True, temperature=0.7, pad_token_id=tokenizer.eos_token_id) # Ensure pad_token_id is set

print("\\nResponse:")
# Decode skipping special tokens, but be mindful that the stop sequence might be part of the output
# Depending on the fine-tuning data, you might need more sophisticated stopping logic
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
Intended Use
This model adapter is intended for research purposes in AI safety, for developing safer conversational AI prototypes, and for educational purposes related to fine-tuning and model alignment.

Limitations and Bias
Safety is Not Guaranteed: While fine-tuned for safety, this model may still generate unsafe, biased, or otherwise problematic content. It should not be deployed in critical applications without rigorous testing and safety guardrails.

Inherited Bias: The model inherits biases present in the base {base_model_id} model and the Anthropic/hh-rlhf dataset.

Performance: Fine-tuning might affect the model's performance on tasks unrelated to safety. The focus was on improving harmlessness based on the HH-RLHF data.

License
The use of this adapter is subject to the terms of the original Gemma model license. Please refer to the Gemma Terms of Use provided by Google. This adapter itself does not impose additional license restrictions beyond those of the base model and the training data.
"""
return yaml_header.strip() + "\n\n" + markdown_content.strip()

--- Main Script Logic ---
if name == "main":
print(f"--- Preparing to update README.md for repo: {REPO_ID} ---")
# --- 1. Authenticate ---
token = HfFolder.get_token() # Try to get token saved by 'huggingface-cli login'
if token is None:
    print("Hugging Face token not found locally.")
    # Fallback to asking for token (less secure for scripts)
    # token = getpass("Please enter your Hugging Face Access Token (with write permission): ")
    # Alternatively, instruct user to run 'huggingface-cli login' first
    print("Please run 'huggingface-cli login' in your terminal first.")
    exit(1) # Exit if no token available

try:
    api = HfApi(token=token)
    user = api.whoami()
    print(f"Authenticated as: {user['name']}")
    if user['name'] != REPO_ID.split('/')[0]:
         print(f"Warning: Logged in user ({user['name']}) does not match repo owner ({REPO_ID.split('/')[0]}). Make sure you have write access.")
except Exception as e:
    print(f"Authentication failed: {e}")
    exit(1)

# --- 2. Generate README Content ---
print("Generating README.md content...")
readme_content = generate_readme_content(REPO_ID, BASE_MODEL_ID, ATTRIBUTION)
# print("\nGenerated Content Preview:\n", readme_content[:500], "...") # Optional preview

# --- 3. Upload README.md ---
print(f"Uploading generated README.md to {REPO_ID}...")
try:
    # Upload the content as a file-like object in memory
    from io import BytesIO
    readme_bytes = readme_content.encode('utf-8')
    api.upload_file(
        path_or_fileobj=BytesIO(readme_bytes),
        path_in_repo="README.md", # Target path in the repository
        repo_id=REPO_ID,
        repo_type="model",
        commit_message="Update model card with detailed info and usage example"
    )
    print("README.md uploaded successfully!")
    print(f"Visit your repository at: https://huggingface.co/{REPO_ID}")
except Exception as e:
    print(f"Failed to upload README.md: {e}")
    import traceback
    traceback.print_exc()
    print("\nUpload failed.")

Downloads last month: 3

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Model tree for jinv2/safesky-ai-gemma-2b-sft

Base model

google/gemma-2b

Adapter

(23700)

this model

jinv2
/

safesky-ai-gemma-2b-sft