Spaces:

mo35
/

gemma4-quantfin

Sleeping

App Files Files Community

mo35 commited on Apr 23

Commit

cfde10f

0 Parent(s):

Add Gradio chat interface

Browse files

Files changed (2) hide show

app.py +84 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""
+HuggingFace Space — Gemma 4 Quantitative Finance Chat
+Hardware: Nvidia T4 medium (16 GB VRAM)
+"""
+import os
+import torch
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from peft import PeftModel
+HF_TOKEN    = os.environ.get("HF_TOKEN", "")
+HF_USERNAME = os.environ.get("HF_USERNAME", "mo35")
+BASE_MODEL  = "google/gemma-4-E4B-it"
+LORA_REPO   = f"{HF_USERNAME}/gemma4-quantfin-lora"
+# ── Load model at startup ─────────────────────────────────────────────────────
+print("Loading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=HF_TOKEN)
+print("Loading base model in 4-bit...")
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit              = True,
+    bnb_4bit_compute_dtype    = torch.bfloat16,
+    bnb_4bit_use_double_quant = True,
+    bnb_4bit_quant_type       = "nf4",
+)
+base_model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
+    quantization_config = bnb_config,
+    device_map          = "auto",
+    token               = HF_TOKEN,
+)
+print(f"Loading LoRA adapter from {LORA_REPO}...")
+model = PeftModel.from_pretrained(base_model, LORA_REPO, token=HF_TOKEN)
+model.eval()
+print("Model ready.")
+# ── Inference ─────────────────────────────────────────────────────────────────
+def respond(message: str, history: list) -> str:
+    messages = [{"role": "user", "content": message}]
+    inputs = tokenizer.apply_chat_template(
+        messages,
+        add_generation_prompt = True,
+        return_tensors        = "pt",
+    ).to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(
+            inputs,
+            max_new_tokens     = 1024,
+            temperature        = 0.7,
+            do_sample          = True,
+            repetition_penalty = 1.1,
+        )
+    return tokenizer.decode(
+        outputs[0][inputs.shape[-1]:],
+        skip_special_tokens = True,
+    )
+# ── Gradio UI ─────────────────────────────────────────────────────────────────
+demo = gr.ChatInterface(
+    fn          = respond,
+    title       = "Gemma 4 — Quantitative Finance",
+    description = (
+        "A specialized AI assistant fine-tuned on quantitative finance: derivatives pricing, "
+        "stochastic calculus, risk models, and portfolio theory. "
+        "Answers include LaTeX mathematical derivations."
+    ),
+    examples    = [
+        "Derive the Black-Scholes PDE from first principles.",
+        "Explain the SABR model and its implied volatility approximation.",
+        "What is the difference between risk-neutral and real-world measures?",
+        "Derive the Heston model characteristic function.",
+        "Explain Value at Risk vs Expected Shortfall.",
+    ],
+    theme          = gr.themes.Soft(),
+    cache_examples = False,
+)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio>=4.0.0
+transformers>=4.49.0
+peft>=0.13.0
+bitsandbytes>=0.43.0
+accelerate>=0.26.0
+torch>=2.1.0