Spaces:
Runtime error
Runtime error
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from huggingface_hub import login | |
| import torch | |
| import os | |
| # Initialize FastAPI app | |
| app = FastAPI(title="AI Model API", description="API for Description and UML Generation") | |
| # Global variables for models | |
| description_model = None | |
| description_tokenizer = None | |
| def authenticate_huggingface(): | |
| """Authenticate with Hugging Face using token""" | |
| try: | |
| token = os.getenv('HUGGING_FACE_HUB_TOKEN') or os.getenv('HF_TOKEN') | |
| if token: | |
| print("Found Hugging Face token, authenticating...") | |
| login(token=token) | |
| print("β Successfully authenticated with Hugging Face!") | |
| return True | |
| else: | |
| print("β No Hugging Face token found in environment variables") | |
| return False | |
| except Exception as e: | |
| print(f"β Authentication failed: {e}") | |
| return False | |
| def load_models(): | |
| global description_model, description_tokenizer | |
| try: | |
| print("Loading models...") | |
| if not authenticate_huggingface(): | |
| print("β οΈ Warning: Not authenticated with Hugging Face.") | |
| # Model configuration | |
| fine_tuned_model = "chaymaemerhrioui/Brain_Model_ACC_unsloth" | |
| base_model = "unsloth/mistral-7b-bnb-4bit" | |
| print(f"Loading fine-tuned model: {fine_tuned_model}") | |
| print(f"Base model: {base_model}") | |
| # Method 1: Try loading as PEFT/LoRA adapter | |
| try: | |
| print("Attempting PEFT/LoRA loading...") | |
| from peft import PeftModel, AutoPeftModelForCausalLM | |
| # Option 1a: Use AutoPeftModelForCausalLM (handles everything automatically) | |
| try: | |
| print("Using AutoPeftModelForCausalLM...") | |
| description_model = AutoPeftModelForCausalLM.from_pretrained( | |
| fine_tuned_model, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| # Get the base model tokenizer | |
| base_model_name = description_model.peft_config.base_model_name_or_path | |
| description_tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
| print("β Successfully loaded with AutoPeftModelForCausalLM!") | |
| except Exception as e1: | |
| print(f"AutoPeftModelForCausalLM failed: {e1}") | |
| # Option 1b: Manual PEFT loading | |
| print("Trying manual PEFT loading...") | |
| print("Loading base model...") | |
| description_tokenizer = AutoTokenizer.from_pretrained(base_model) | |
| base_model_obj = AutoModelForCausalLM.from_pretrained( | |
| base_model, | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| print("Loading PEFT adapter...") | |
| description_model = PeftModel.from_pretrained( | |
| base_model_obj, | |
| fine_tuned_model | |
| ) | |
| print("β Successfully loaded with manual PEFT!") | |
| except Exception as e: | |
| print(f"PEFT loading failed: {e}") | |
| # Method 2: Try loading as regular fine-tuned model with base model tokenizer | |
| try: | |
| print("Attempting regular fine-tuned model loading...") | |
| # Use base model tokenizer (often works better for fine-tuned models) | |
| print("Loading tokenizer from base model...") | |
| description_tokenizer = AutoTokenizer.from_pretrained(base_model) | |
| print("Loading fine-tuned model...") | |
| description_model = AutoModelForCausalLM.from_pretrained( | |
| fine_tuned_model, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| print("β Successfully loaded as regular fine-tuned model!") | |
| except Exception as e2: | |
| print(f"Regular fine-tuned loading failed: {e2}") | |
| # Method 3: Load base model only (as fallback) | |
| print("Loading base model as fallback...") | |
| description_tokenizer = AutoTokenizer.from_pretrained(base_model) | |
| description_model = AutoModelForCausalLM.from_pretrained( | |
| base_model, | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| print("β οΈ Loaded base model only - fine-tuning not applied!") | |
| # Set up tokenizer padding | |
| if description_tokenizer.pad_token is None: | |
| description_tokenizer.pad_token = description_tokenizer.eos_token | |
| print("β Model loading completed successfully!") | |
| except Exception as e: | |
| print(f"β All loading methods failed: {e}") | |
| raise e | |
| # Load models at startup | |
| async def startup_event(): | |
| try: | |
| load_models() | |
| except Exception as e: | |
| print(f"β Model loading failed during startup: {e}") | |
| print("API will start but model endpoints will return 503 errors") | |
| # Pydantic models for request validation | |
| class DescriptionItem(BaseModel): | |
| prompt: str | |
| async def root(): | |
| return {"message": "AI Model API is running"} | |
| async def health_check(): | |
| model_status = { | |
| "description_model": description_model is not None, | |
| "description_tokenizer": description_tokenizer is not None | |
| } | |
| return { | |
| "status": "healthy" if all(model_status.values()) else "partial", | |
| "models": model_status | |
| } | |
| async def generate_description(item: DescriptionItem): | |
| if description_model is None or description_tokenizer is None: | |
| raise HTTPException( | |
| status_code=503, | |
| detail="Description model not available" | |
| ) | |
| try: | |
| # Tokenize the input prompt | |
| inputs = description_tokenizer( | |
| item.prompt, | |
| return_tensors="pt", | |
| padding=True, | |
| truncation=True, | |
| max_length=512 | |
| ) | |
| # Move inputs to model device | |
| if hasattr(description_model, 'device'): | |
| inputs = {k: v.to(description_model.device) for k, v in inputs.items()} | |
| # Generate response | |
| with torch.no_grad(): | |
| outputs = description_model.generate( | |
| **inputs, | |
| max_new_tokens=256, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| pad_token_id=description_tokenizer.eos_token_id, | |
| repetition_penalty=1.1 | |
| ) | |
| # Decode only the new tokens | |
| input_length = inputs['input_ids'].shape[1] | |
| description = description_tokenizer.decode( | |
| outputs[0][input_length:], | |
| skip_special_tokens=True | |
| ) | |
| return {"description": description.strip()} | |
| except Exception as e: | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"Error generating description: {str(e)}" | |
| ) |