My_Two_Modeles / main.py
chaymaemerhrioui's picture
Update main.py
5fc03fb verified
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
import torch
import os
# Initialize FastAPI app
app = FastAPI(title="AI Model API", description="API for Description and UML Generation")
# Global variables for models
description_model = None
description_tokenizer = None
def authenticate_huggingface():
"""Authenticate with Hugging Face using token"""
try:
token = os.getenv('HUGGING_FACE_HUB_TOKEN') or os.getenv('HF_TOKEN')
if token:
print("Found Hugging Face token, authenticating...")
login(token=token)
print("βœ… Successfully authenticated with Hugging Face!")
return True
else:
print("❌ No Hugging Face token found in environment variables")
return False
except Exception as e:
print(f"❌ Authentication failed: {e}")
return False
def load_models():
global description_model, description_tokenizer
try:
print("Loading models...")
if not authenticate_huggingface():
print("⚠️ Warning: Not authenticated with Hugging Face.")
# Model configuration
fine_tuned_model = "chaymaemerhrioui/Brain_Model_ACC_unsloth"
base_model = "unsloth/mistral-7b-bnb-4bit"
print(f"Loading fine-tuned model: {fine_tuned_model}")
print(f"Base model: {base_model}")
# Method 1: Try loading as PEFT/LoRA adapter
try:
print("Attempting PEFT/LoRA loading...")
from peft import PeftModel, AutoPeftModelForCausalLM
# Option 1a: Use AutoPeftModelForCausalLM (handles everything automatically)
try:
print("Using AutoPeftModelForCausalLM...")
description_model = AutoPeftModelForCausalLM.from_pretrained(
fine_tuned_model,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
# Get the base model tokenizer
base_model_name = description_model.peft_config.base_model_name_or_path
description_tokenizer = AutoTokenizer.from_pretrained(base_model_name)
print("βœ… Successfully loaded with AutoPeftModelForCausalLM!")
except Exception as e1:
print(f"AutoPeftModelForCausalLM failed: {e1}")
# Option 1b: Manual PEFT loading
print("Trying manual PEFT loading...")
print("Loading base model...")
description_tokenizer = AutoTokenizer.from_pretrained(base_model)
base_model_obj = AutoModelForCausalLM.from_pretrained(
base_model,
torch_dtype=torch.float16,
device_map="auto"
)
print("Loading PEFT adapter...")
description_model = PeftModel.from_pretrained(
base_model_obj,
fine_tuned_model
)
print("βœ… Successfully loaded with manual PEFT!")
except Exception as e:
print(f"PEFT loading failed: {e}")
# Method 2: Try loading as regular fine-tuned model with base model tokenizer
try:
print("Attempting regular fine-tuned model loading...")
# Use base model tokenizer (often works better for fine-tuned models)
print("Loading tokenizer from base model...")
description_tokenizer = AutoTokenizer.from_pretrained(base_model)
print("Loading fine-tuned model...")
description_model = AutoModelForCausalLM.from_pretrained(
fine_tuned_model,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
print("βœ… Successfully loaded as regular fine-tuned model!")
except Exception as e2:
print(f"Regular fine-tuned loading failed: {e2}")
# Method 3: Load base model only (as fallback)
print("Loading base model as fallback...")
description_tokenizer = AutoTokenizer.from_pretrained(base_model)
description_model = AutoModelForCausalLM.from_pretrained(
base_model,
torch_dtype=torch.float16,
device_map="auto"
)
print("⚠️ Loaded base model only - fine-tuning not applied!")
# Set up tokenizer padding
if description_tokenizer.pad_token is None:
description_tokenizer.pad_token = description_tokenizer.eos_token
print("βœ… Model loading completed successfully!")
except Exception as e:
print(f"❌ All loading methods failed: {e}")
raise e
# Load models at startup
@app.on_event("startup")
async def startup_event():
try:
load_models()
except Exception as e:
print(f"❌ Model loading failed during startup: {e}")
print("API will start but model endpoints will return 503 errors")
# Pydantic models for request validation
class DescriptionItem(BaseModel):
prompt: str
@app.get("/")
async def root():
return {"message": "AI Model API is running"}
@app.get("/health")
async def health_check():
model_status = {
"description_model": description_model is not None,
"description_tokenizer": description_tokenizer is not None
}
return {
"status": "healthy" if all(model_status.values()) else "partial",
"models": model_status
}
@app.post("/generate_description")
async def generate_description(item: DescriptionItem):
if description_model is None or description_tokenizer is None:
raise HTTPException(
status_code=503,
detail="Description model not available"
)
try:
# Tokenize the input prompt
inputs = description_tokenizer(
item.prompt,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512
)
# Move inputs to model device
if hasattr(description_model, 'device'):
inputs = {k: v.to(description_model.device) for k, v in inputs.items()}
# Generate response
with torch.no_grad():
outputs = description_model.generate(
**inputs,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=description_tokenizer.eos_token_id,
repetition_penalty=1.1
)
# Decode only the new tokens
input_length = inputs['input_ids'].shape[1]
description = description_tokenizer.decode(
outputs[0][input_length:],
skip_special_tokens=True
)
return {"description": description.strip()}
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error generating description: {str(e)}"
)