Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| app = FastAPI() | |
| def greet_json(): | |
| return {"Hello": "World!"} | |
| def say_hello(msg: str): | |
| print("model") | |
| tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "google/gemma-2b-it", | |
| device_map="auto", | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| print("token & msg") | |
| input_ids = tokenizer(msg, return_tensors="pt").to("cpu") | |
| print("output") | |
| outputs = model.generate(**input_ids, max_length=500) | |
| print("complete") | |
| return {"message": tokenizer.decode(outputs[0])} |