sp500llm / app.py
mdot77's picture
endpoints
af46622
import os
from huggingface_hub import login
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
from peft import PeftModel
import torch
import json
# ── 0) Hugging Face login ────────────────────────────────────────────────────────
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
raise ValueError("HF_TOKEN environment variable is not set.")
login(token=hf_token)
# ── 1) Load your model ───────────────────────────────────────────────────────────
base_model_id = "meta-llama/Llama-2-7b-chat-hf"
adapter_id = "mdot77/fingpt-llama2-7b-forecaster-finetuned"
tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_fast=True)
base_model = AutoModelForCausalLM.from_pretrained(
base_model_id,
device_map="auto",
load_in_8bit=True, # save VRAM
)
model = PeftModel.from_pretrained(
base_model,
adapter_id,
device_map="auto",
)
model.eval()
# ── 2) Define your system‐instruction template ───────────────────────────────────
SYSTEM = """You are a portfolio optimization assistant.
For a given stock snapshot, recommend how the allocation should be adjusted.
Your response MUST be valid JSON matching this schema:
{
"ticker": "<string>",
"snapshot": "<YYYY-MM-DD>",
"verdict": "<Increase|Decrease|Hold|Add|Remove>",
"new_alloc_pct": <number>,
"reasoning": "<short explanation>"
}
Do not include any extra keys or commentary. At the end, emit only the JSON."""
# ── 3) Inference function ───────────────────────────────────────────────────────
def infer(data_json: str):
# Hardcoded generation parameters
max_new_tokens = 256
temperature = 0.0
top_p = 1.0
prompt = (
"[INST] <<SYS>>\n"
f"{SYSTEM}\n"
"<</SYS>>\n\n"
"DATA:\n"
f"{data_json}\n"
"[/INST]"
)
inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to(model.device)
gen_cfg = GenerationConfig(
max_new_tokens=max_new_tokens,
do_sample=(temperature > 0),
temperature=temperature,
top_p=top_p,
use_cache=True,
)
outputs = model.generate(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
generation_config=gen_cfg,
)
new_tokens = outputs[0, inputs["input_ids"].shape[-1]:]
reply = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
try:
parsed = json.loads(reply)
return json.dumps(parsed, indent=2)
except json.JSONDecodeError:
return reply
# ── 4) API wrapper function for the main prediction ──────────────────────────────
def predict_api_wrapper(request_data):
"""Wrapper function that handles the API request format"""
try:
# Extract the data from the request
if isinstance(request_data, dict) and "data" in request_data and len(request_data["data"]) > 0:
data_json = request_data["data"][0]
elif isinstance(request_data, str):
data_json = request_data
else:
return {"error": "No data provided"}
# Call the inference function
result = infer(data_json)
# Try to parse the result as JSON if it's not already
try:
if isinstance(result, str):
parsed_result = json.loads(result)
else:
parsed_result = result
except json.JSONDecodeError:
# If it's not valid JSON, return as text
parsed_result = {"raw_output": result}
return parsed_result
except Exception as e:
return {"error": str(e)}
# ── 5) Gradio interface with API endpoints ───────────────────────────────────────
with gr.Blocks(title="Portfolio-Optimizer Inference") as iface:
gr.Markdown("# Portfolio-Optimizer Inference")
gr.Markdown("Paste your snapshot JSON and get back a single-JSON allocation verdict.")
with gr.Tab("Inference"):
input_text = gr.Textbox(
label="Snapshot data (JSON)",
lines=15,
value='{"ticker": "COIN", "snapshot": "2022-06-18", "previous_allocation_pct": 0.05}'
)
output_json = gr.JSON(label="Model output")
predict_btn = gr.Button("Predict")
predict_btn.click(fn=infer, inputs=input_text, outputs=output_json)
with gr.Tab("API Testing"):
gr.Markdown("## API Testing Interface")
gr.Markdown("Use this to test the API functionality. The main API endpoint is available at `/api/predict/`")
api_input = gr.Textbox(
label="API Request Data (JSON)",
lines=10,
value='{"data": ["{\\"ticker\\": \\"AAPL\\", \\"snapshot\\": \\"2025-01-01\\", \\"previous_allocation_pct\\": 0.05}"]}'
)
api_output = gr.JSON(label="API Response")
api_btn = gr.Button("Test API")
api_btn.click(fn=predict_api_wrapper, inputs=api_input, outputs=api_output)
# ── 6) Launch for Hugging Face Spaces ───────────────────────────────────────────
if __name__ == "__main__":
# For Hugging Face Spaces, use the default launch
# The API endpoints will be available at /api/predict/ automatically
iface.launch()