Spaces:
Runtime error
Runtime error
| import os | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| pipeline, | |
| AutoConfig | |
| ) | |
| # Load the configuration and remove any quantization config if present | |
| config = AutoConfig.from_pretrained("devops-bda/Abap") | |
| if hasattr(config, "quantization_config"): | |
| del config.quantization_config # Safely delete it instead of setting to None | |
| # Load the model and tokenizer without 4-bit quantization | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "devops-bda/Abap", | |
| config=config, | |
| device_map="auto" # This ensures the model loads properly on CPU | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained("devops-bda/Abap") | |
| # Create a text-generation pipeline with the loaded model and tokenizer | |
| text_gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| app = FastAPI() | |
| class InputData(BaseModel): | |
| input_text: str | |
| async def health_check(): | |
| return {"status": "ok", "message": "Model is ready"} | |
| async def predict(data: InputData): | |
| output = text_gen_pipeline(data.input_text, max_length=500) | |
| return {"output": output} |