Brain-LLM
/

phi4-mini-raw

Model card Files Files and versions

phi4-mini-raw / example_client.py

Yong Liu

update handler

093ad9c 11 months ago

history blame contribute delete

4.41 kB

	import requests
	import json
	import os
	import argparse

	def query_endpoint(endpoint_url, api_token=None, prompt="Hello, how are you?",
	system_message="You are a helpful assistant.",
	max_tokens=256, temperature=0.7,
	format_type="openai"):
	"""
	Query the Phi-4 Mini model at the specified HuggingFace Inference Endpoint.

	Args:
	endpoint_url: The URL of your HuggingFace Inference Endpoint
	api_token: Your HuggingFace API token (if needed)
	prompt: The user message to send to the model
	system_message: The system message to include
	max_tokens: Maximum number of tokens to generate
	temperature: Temperature for generation (0.0 to 1.0)
	format_type: Type of request format to use:
	"openai" - Standard OpenAI format
	"hf_wrapped" - HuggingFace format with OpenAI format wrapped in "inputs"
	"simple" - Simple text input in "inputs" field

	Returns:
	The response from the model
	"""
	# Prepare headers
	headers = {
	"Content-Type": "application/json"
	}

	if api_token:
	headers["Authorization"] = f"Bearer {api_token}"

	# Prepare the request payload based on format_type
	if format_type == "openai":
	# Standard OpenAI format
	payload = {
	"messages": [
	{"role": "system", "content": system_message},
	{"role": "user", "content": prompt}
	],
	"max_tokens": max_tokens,
	"temperature": temperature
	}
	elif format_type == "hf_wrapped":
	# HuggingFace wrapped format
	payload = {
	"inputs": {
	"messages": [
	{"role": "system", "content": system_message},
	{"role": "user", "content": prompt}
	],
	"max_tokens": max_tokens,
	"temperature": temperature
	}
	}
	elif format_type == "simple":
	# Simple text input
	payload = {
	"inputs": prompt
	}
	else:
	raise ValueError(f"Invalid format type: {format_type}")

	# Make the request
	try:
	print(f"Request payload: {json.dumps(payload, indent=2)}")
	response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload))
	response.raise_for_status() # Raise an exception for HTTP errors

	# Parse and return the response
	return response.json()
	except requests.exceptions.RequestException as e:
	print(f"Error making request: {e}")
	if hasattr(e, 'response') and e.response:
	print(f"Response content: {e.response.text}")
	return None

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Query a Phi-4 Mini HuggingFace Inference Endpoint")
	parser.add_argument("--url", type=str, required=True, help="The endpoint URL")
	parser.add_argument("--token", type=str, default=os.environ.get("HF_API_TOKEN"), help="HuggingFace API token")
	parser.add_argument("--prompt", type=str, default="Explain quantum computing in simple terms.", help="User prompt")
	parser.add_argument("--system", type=str, default="You are a helpful assistant.", help="System message")
	parser.add_argument("--max_tokens", type=int, default=256, help="Maximum tokens to generate")
	parser.add_argument("--temperature", type=float, default=0.7, help="Temperature (0.0 to 1.0)")
	parser.add_argument("--format", type=str, default="openai",
	choices=["openai", "hf_wrapped", "simple"],
	help="Format to use for the request")

	args = parser.parse_args()

	print(f"Querying endpoint: {args.url}")
	print(f"Prompt: {args.prompt}")
	print(f"Format: {args.format}")

	response = query_endpoint(
	args.url,
	args.token,
	args.prompt,
	args.system,
	args.max_tokens,
	args.temperature,
	args.format
	)

	if response:
	print("\nResponse:")
	if "choices" in response and len(response["choices"]) > 0:
	print(response["choices"][0]["message"]["content"])
	else:
	print(json.dumps(response, indent=2))
	else:
	print("Failed to get a valid response")