Spaces:

triflix
/

brainfuncall

Running

App Files Files Community

brainfuncall / main.py

triflix

Update main.py

b3f0838 verified 7 days ago

raw

history blame contribute delete

3.82 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel, Field
	from typing import List, Dict, Any
	import os
	import datetime

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from huggingface_hub import login

	# ==========================================
	# 1. APP SETUP
	# ==========================================
	app = FastAPI(title="FunctionGemma Brain API", version="1.0.0")

	MODEL_ID = "google/functiongemma-270m-it"
	tokenizer = None
	model = None

	# ==========================================
	# 2. FEW-SHOT EXAMPLES (The Teacher)
	# ==========================================
	# We teach the model the correct tool names here.
	# This list simulates a previous conversation so the model knows what to do.
	FEW_SHOT_MESSAGES = [
	# Example 1: Counting/Stats
	{"role": "user", "content": "How many regions are there?"},
	{"role": "model", "content": "<start_function_call>call:get_aggregate_stats{target_entity:revenue_region}<end_function_call>"},

	# Example 2: Specific Search
	{"role": "user", "content": "What is the water level in Aadale dam?"},
	{"role": "model", "content": "<start_function_call>call:search_specific_dam{dam_name:Aadale}<end_function_call>"},

	# Example 3: Filtering
	{"role": "user", "content": "Show me Major dams in Pune."},
	{"role": "model", "content": "<start_function_call>call:filter_dams{district:Pune,project_type:Major}<end_function_call>"},

	# Example 4: Irrelevant Question (Teach it to NOT call functions for random stuff)
	{"role": "user", "content": "What is the capital of France?"},
	{"role": "model", "content": "I cannot answer that as it is not related to the dam database."}
	]

	# ==========================================
	# 3. STARTUP
	# ==========================================
	@app.on_event("startup")
	async def startup():
	global tokenizer, model
	hf_token = os.getenv("HF_TOKEN")
	if not hf_token: raise RuntimeError("HF_TOKEN missing")
	login(token=hf_token)

	print(f"🧠 Loading {MODEL_ID}...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="cpu", torch_dtype=torch.float32)
	print("✅ Model Loaded.")

	# ==========================================
	# 4. API ENDPOINT
	# ==========================================
	class ChatRequest(BaseModel):
	query: str
	tools: List[Dict[str, Any]]
	include_date: bool = True

	@app.post("/generate")
	async def generate_function_call(request: ChatRequest):
	if not model: raise HTTPException(status_code=503, detail="Model loading")

	try:
	# 1. System Prompt
	system_content = "You are a model that can do function calling with the following functions."
	if request.include_date:
	today = datetime.date.today().isoformat()
	system_content += f" Today is {today}."

	# 2. Construct History: System -> Examples -> Current User Query
	messages = [{"role": "system", "content": system_content}]

	# Inject the examples!
	messages.extend(FEW_SHOT_MESSAGES)

	# Add the actual user query
	messages.append({"role": "user", "content": request.query})

	# 3. Tokenize
	inputs = tokenizer.apply_chat_template(
	messages,
	tools=request.tools,
	add_generation_prompt=True,
	return_dict=True,
	return_tensors="pt",
	)

	# 4. Generate
	outputs = model.generate(**inputs, max_new_tokens=128, do_sample=False)
	generated_text = tokenizer.decode(outputs[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)

	return {"response": generated_text}

	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))