File size: 3,505 Bytes
27ba2b2
 
 
 
 
 
d994991
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27ba2b2
 
 
 
b1f9442
27ba2b2
 
b1f9442
27ba2b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1f9442
27ba2b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d994991
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from fastapi.middleware.cors import CORSMiddleware

# -------------------------------
# Load model & tokenizer
# -------------------------------
model_name = "thedeba/deb-8B"  # HF Hub model path
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
device = "cpu"  # HF Spaces free tier usually uses CPU
model.to(device)

# -------------------------------
# FastAPI setup
# -------------------------------
app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# -------------------------------
# Pydantic model for input
# -------------------------------
class Query(BaseModel):
    text: str

# -------------------------------
# Single generate endpoint
# -------------------------------
@app.post("/generate")
def generate(query: Query):
    # Tokenize input
    inputs = tokenizer(query.text, return_tensors="pt").to(device)

    # Generate output
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.8,
    )

    # Decode output
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return {"response": response_text}

# -------------------------------
# Root endpoint
# -------------------------------
@app.get("/")
def root():
    return {"status": "API is running!"}

# -------------------------------
# Run locally (optional)
# -------------------------------
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)



"""from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from fastapi.middleware.cors import CORSMiddleware


# -------------------------------
# Load model & tokenizer from HF Hub
# -------------------------------
model_name = "thedeba/deb-8B"  # HF Hub model path
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
device = "auto"  # Spaces free tier uses CPU; you can switch to "cuda" if GPU granted
#model.to(device)

# -------------------------------
# FastAPI setup
# -------------------------------
app = FastAPI()


app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # or ["https://<username>.github.io"]
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)



class Query(BaseModel):
    text: str

@app.post("/generate")
def generate(query: Query):
    messages = [{"role": "user", "content": query.text}]

    # Convert to model input using chat template
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to(device)

    # Generate
    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=200,
        use_cache=True,
        temperature=0.8,
        min_p=0.1,
    )

    # Decode & extract assistant response
    output_string = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    response = output_string.split("assistant")[-1].strip()
    return {"response": response}

@app.get("/")
def root():
    return {"deb": "API is running!"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)"""