Spaces:

mdot77
/

sp500llm

Runtime error

App Files Files Community

sp500llm / app.py

mdot77

endpoints

af46622 7 months ago

raw

history blame contribute delete

5.88 kB

	import os
	from huggingface_hub import login
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
	from peft import PeftModel
	import torch
	import json

	# ── 0) Hugging Face login ────────────────────────────────────────────────────────
	hf_token = os.environ.get("HF_TOKEN")
	if not hf_token:
	raise ValueError("HF_TOKEN environment variable is not set.")
	login(token=hf_token)

	# ── 1) Load your model ───────────────────────────────────────────────────────────
	base_model_id = "meta-llama/Llama-2-7b-chat-hf"
	adapter_id = "mdot77/fingpt-llama2-7b-forecaster-finetuned"

	tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_fast=True)
	base_model = AutoModelForCausalLM.from_pretrained(
	base_model_id,
	device_map="auto",
	load_in_8bit=True, # save VRAM
	)
	model = PeftModel.from_pretrained(
	base_model,
	adapter_id,
	device_map="auto",
	)
	model.eval()

	# ── 2) Define your system‐instruction template ───────────────────────────────────
	SYSTEM = """You are a portfolio optimization assistant.

	For a given stock snapshot, recommend how the allocation should be adjusted.
	Your response MUST be valid JSON matching this schema:
	{
	"ticker": "<string>",
	"snapshot": "<YYYY-MM-DD>",
	"verdict": "<Increase\|Decrease\|Hold\|Add\|Remove>",
	"new_alloc_pct": <number>,
	"reasoning": "<short explanation>"
	}

	Do not include any extra keys or commentary. At the end, emit only the JSON."""

	# ── 3) Inference function ───────────────────────────────────────────────────────
	def infer(data_json: str):
	# Hardcoded generation parameters
	max_new_tokens = 256
	temperature = 0.0
	top_p = 1.0

	prompt = (
	"[INST] <<SYS>>\n"
	f"{SYSTEM}\n"
	"<</SYS>>\n\n"
	"DATA:\n"
	f"{data_json}\n"
	"[/INST]"
	)
	inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to(model.device)
	gen_cfg = GenerationConfig(
	max_new_tokens=max_new_tokens,
	do_sample=(temperature > 0),
	temperature=temperature,
	top_p=top_p,
	use_cache=True,
	)
	outputs = model.generate(
	inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	generation_config=gen_cfg,
	)
	new_tokens = outputs[0, inputs["input_ids"].shape[-1]:]
	reply = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
	try:
	parsed = json.loads(reply)
	return json.dumps(parsed, indent=2)
	except json.JSONDecodeError:
	return reply

	# ── 4) API wrapper function for the main prediction ──────────────────────────────
	def predict_api_wrapper(request_data):
	"""Wrapper function that handles the API request format"""
	try:
	# Extract the data from the request
	if isinstance(request_data, dict) and "data" in request_data and len(request_data["data"]) > 0:
	data_json = request_data["data"][0]
	elif isinstance(request_data, str):
	data_json = request_data
	else:
	return {"error": "No data provided"}

	# Call the inference function
	result = infer(data_json)

	# Try to parse the result as JSON if it's not already
	try:
	if isinstance(result, str):
	parsed_result = json.loads(result)
	else:
	parsed_result = result
	except json.JSONDecodeError:
	# If it's not valid JSON, return as text
	parsed_result = {"raw_output": result}

	return parsed_result
	except Exception as e:
	return {"error": str(e)}

	# ── 5) Gradio interface with API endpoints ───────────────────────────────────────
	with gr.Blocks(title="Portfolio-Optimizer Inference") as iface:
	gr.Markdown("# Portfolio-Optimizer Inference")
	gr.Markdown("Paste your snapshot JSON and get back a single-JSON allocation verdict.")

	with gr.Tab("Inference"):
	input_text = gr.Textbox(
	label="Snapshot data (JSON)",
	lines=15,
	value='{"ticker": "COIN", "snapshot": "2022-06-18", "previous_allocation_pct": 0.05}'
	)
	output_json = gr.JSON(label="Model output")
	predict_btn = gr.Button("Predict")
	predict_btn.click(fn=infer, inputs=input_text, outputs=output_json)

	with gr.Tab("API Testing"):
	gr.Markdown("## API Testing Interface")
	gr.Markdown("Use this to test the API functionality. The main API endpoint is available at `/api/predict/`")

	api_input = gr.Textbox(
	label="API Request Data (JSON)",
	lines=10,
	value='{"data": ["{\\"ticker\\": \\"AAPL\\", \\"snapshot\\": \\"2025-01-01\\", \\"previous_allocation_pct\\": 0.05}"]}'
	)
	api_output = gr.JSON(label="API Response")
	api_btn = gr.Button("Test API")
	api_btn.click(fn=predict_api_wrapper, inputs=api_input, outputs=api_output)

	# ── 6) Launch for Hugging Face Spaces ───────────────────────────────────────────
	if __name__ == "__main__":
	# For Hugging Face Spaces, use the default launch
	# The API endpoints will be available at /api/predict/ automatically
	iface.launch()