LLM / app.py
ddededstger's picture
Update app.py
ace15b2 verified
raw
history blame
2.53 kB
import gradio as gr
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
from huggingface_hub import login, snapshot_download
import torch
import os
import json
# Login using secret (secure, no hardcode)
login(os.environ["HF_TOKEN"])
# Model setup (loads once on Space startup; switched to Llama 3 base)
model_id = "agarkovv/CryptoTrader-LM" # Keep PEFT adapter if compatible; otherwise fine-tune on Llama
base_model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
MAX_LENGTH = 32768
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Use GPU if available (ZeroGPU on HF)
# Workaround: Download model files, edit adapter_config.json to remove 'model_type' if present
local_dir = snapshot_download(repo_id=model_id)
config_path = os.path.join(local_dir, "adapter_config.json")
with open(config_path, 'r') as f:
config = json.load(f)
if 'model_type' in config:
del config['model_type']
with open(config_path, 'w') as f:
json.dump(config, f)
# Now load the model from modified local dir, passing token for gated base model
token = os.environ["HF_TOKEN"]
model = AutoPeftModelForCausalLM.from_pretrained(local_dir, token=token)
tokenizer = AutoTokenizer.from_pretrained(base_model_id, token=token)
model = model.to(DEVICE)
model.eval()
def predict_trading_decision(prompt: str) -> str:
"""Predict daily trading decision (buy, sell, or hold) for BTC or ETH based on news and historical prices.
Args:
prompt: Input prompt containing cryptocurrency news and historical price data (format: [INST]YOUR PROMPT HERE[/INST]).
Returns:
Generated trading decision as text (e.g., 'Buy BTC at $62k').
"""
# Format prompt as required
formatted_prompt = f"[INST]{prompt}[/INST]"
inputs = tokenizer(
formatted_prompt, return_tensors="pt", padding=False, max_length=MAX_LENGTH, truncation=True
)
inputs = {key: value.to(model.device) for key, value in inputs.items()}
res = model.generate(
**inputs,
use_cache=True,
max_new_tokens=MAX_LENGTH,
)
output = tokenizer.decode(res[0], skip_special_tokens=True)
return output
# Gradio Interface
demo = gr.Interface(
fn=predict_trading_decision,
inputs=gr.Textbox(label="Input Prompt (News + Prices)"),
outputs=gr.Textbox(label="Trading Decision"),
title="CryptoTrader-LM with Llama MCP Tool",
description="Predict buy/sell/hold for BTC/ETH using Llama 3 base."
)
# Launch with MCP support
demo.launch(mcp_server=True)