api-smollm135m

Sleeping

App Files Files Community

api-smollm135m / app.py

khurrameycon

Update app.py

be89a8a verified 12 months ago

raw

history blame

2.62 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
	from safetensors.torch import load_file
	import torch

	# Define the input schema
	class ModelInput(BaseModel):
	prompt: str
	max_new_tokens: int = 50 # Optional: Defaults to 50 tokens

	# Initialize FastAPI app
	app = FastAPI()

	# Load the base model and tokenizer
	base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct" # Base model
	adapter_weights_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs/resolve/main/adapter_model.safetensors"

	# Path to the adapter weights

	tokenizer = AutoTokenizer.from_pretrained(base_model_path)
	model = AutoModelForCausalLM.from_pretrained(base_model_path)

	# Load the adapter weights
	def load_adapter_weights(model, adapter_weights_path):
	adapter_weights = load_file(adapter_weights_path)
	model.load_state_dict(adapter_weights, strict=False) # Apply the weights
	return model

	# Apply adapter weights to the model
	model = load_adapter_weights(model, adapter_weights_path)

	# Ensure the model is in evaluation mode
	model.eval()

	# Initialize the pipeline
	generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

	# Helper function to generate a response
	def generate_response(model, tokenizer, instruction, max_new_tokens=128):
	"""Generate a response from the model based on an instruction."""
	try:
	# Tokenize and generate the output
	inputs = tokenizer(instruction, return_tensors="pt")
	inputs = {key: value.to(model.device) for key, value in inputs.items()} # Move tensors to the model's device
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	)
	# Decode the output
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response
	except Exception as e:
	raise ValueError(f"Error generating response: {e}")

	@app.post("/generate")
	def generate_text(input: ModelInput):
	"""API endpoint to generate text."""
	try:
	# Call the helper function
	response = generate_response(
	model=model, tokenizer=tokenizer, instruction=input.prompt, max_new_tokens=input.max_new_tokens
	)
	return {"generated_text": response}
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/")
	def root():
	return {"message": "Welcome to the Hugging Face Model API with Adapter Support!"}