Spaces:
Runtime error
Runtime error
File size: 1,184 Bytes
27c4f25 3cc890e 27c4f25 d07f16f 3cc890e 27c4f25 d07f16f ae29859 27c4f25 3cc890e 156783f 3cc890e 27c4f25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | import os
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
pipeline,
AutoConfig
)
# Load the configuration and remove any quantization config if present
config = AutoConfig.from_pretrained("devops-bda/Abap")
if hasattr(config, "quantization_config"):
del config.quantization_config # Safely delete it instead of setting to None
# Load the model and tokenizer without 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
"devops-bda/Abap",
config=config,
device_map="auto" # This ensures the model loads properly on CPU
)
tokenizer = AutoTokenizer.from_pretrained("devops-bda/Abap")
# Create a text-generation pipeline with the loaded model and tokenizer
text_gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
app = FastAPI()
class InputData(BaseModel):
input_text: str
@app.get("/health")
async def health_check():
return {"status": "ok", "message": "Model is ready"}
@app.post("/predict")
async def predict(data: InputData):
output = text_gen_pipeline(data.input_text, max_length=500)
return {"output": output} |