File size: 1,184 Bytes
27c4f25
3cc890e
 
27c4f25
 
 
 
 
 
 
 
 
 
d07f16f
3cc890e
27c4f25
 
 
 
d07f16f
ae29859
27c4f25
 
 
 
3cc890e
 
 
 
 
 
 
 
 
156783f
3cc890e
 
27c4f25
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    pipeline, 
    AutoConfig
)

# Load the configuration and remove any quantization config if present
config = AutoConfig.from_pretrained("devops-bda/Abap")
if hasattr(config, "quantization_config"):
    del config.quantization_config  # Safely delete it instead of setting to None

# Load the model and tokenizer without 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
    "devops-bda/Abap",
    config=config,
    device_map="auto"  # This ensures the model loads properly on CPU
)
tokenizer = AutoTokenizer.from_pretrained("devops-bda/Abap")

# Create a text-generation pipeline with the loaded model and tokenizer
text_gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

app = FastAPI()

class InputData(BaseModel):
    input_text: str

@app.get("/health")
async def health_check():
    return {"status": "ok", "message": "Model is ready"}

@app.post("/predict")
async def predict(data: InputData):
    output = text_gen_pipeline(data.input_text, max_length=500)
    return {"output": output}