Spaces:

MCP-1st-Birthday
/

sdlc-agent

Runtime error

App Files Files Community

sdlc-agent / src /tools /ask_model.py

Veeru-c

initial commit

06bd253 22 days ago

raw

history blame contribute delete

1.87 kB

	import modal

	app = modal.App("ask-finetuned-model")

	vol_checkpoints = modal.Volume.from_name("model-checkpoints")

	image = modal.Image.from_registry("nvidia/cuda:12.1.1-devel-ubuntu22.04", add_python="3.10") \
	.apt_install("git") \
	.run_commands(
	"pip install --upgrade pip",
	"pip install packaging ninja psutil",
	"pip install unsloth_zoo",
	"pip install torchvision",
	"pip install xformers trl peft accelerate bitsandbytes scipy huggingface_hub protobuf sentencepiece einops",
	"pip install --no-deps 'unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git'"
	) \
	.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})

	@app.function(
	image=image,
	volumes={"/data/checkpoints": vol_checkpoints},
	gpu="A10G",
	timeout=600
	)
	def ask_question(question: str, context: str = "Context: Japan Census data."):
	from unsloth import FastLanguageModel

	print(f"Loading model...")

	model_path = "/data/checkpoints/phi3-census-lora"

	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=model_path,
	max_seq_length=2048,
	dtype=None,
	load_in_4bit=True,
	)
	FastLanguageModel.for_inference(model)

	alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

	### Instruction:
	{}

	### Input:
	{}

	### Response:
	"""

	inputs = tokenizer(
	[alpaca_prompt.format(question, context)],
	return_tensors="pt"
	).to("cuda")

	outputs = model.generate(**inputs, max_new_tokens=150, use_cache=True, temperature=0.1)
	response = tokenizer.batch_decode(outputs)[0]

	# Extract answer
	if "### Response:\n" in response:
	answer = response.split("### Response:\n")[1].split("