llama.cpp/examples/json_schema_pydantic_example.py

LLM-Maroc / llama.cpp /examples /json_schema_pydantic_example.py

Upload folder using huggingface_hub

305a42c verified 6 months ago

3.21 kB

	# Usage:
	#! ./llama-server -m some-model.gguf &
	#! pip install pydantic
	#! python json_schema_pydantic_example.py

	from pydantic import BaseModel, Field, TypeAdapter
	from annotated_types import MinLen
	from typing import Annotated, List, Optional
	import json, requests

	if True:

	def create_completion(, response_model=None, endpoint="http://localhost:8080/v1/chat/completions", messages, *kwargs):
	'''
	Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
	(llama.cpp server, llama-cpp-python, Anyscale / Together...)

	The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
	'''
	response_format = None
	type_adapter = None

	if response_model:
	type_adapter = TypeAdapter(response_model)
	schema = type_adapter.json_schema()
	messages = [{
	"role": "system",
	"content": f"You respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
	}] + messages
	response_format={"type": "json_object", "schema": schema}

	data = requests.post(endpoint, headers={"Content-Type": "application/json"},
	json=dict(messages=messages, response_format=response_format, **kwargs)).json()
	if 'error' in data:
	raise Exception(data['error']['message'])

	content = data["choices"][0]["message"]["content"]
	return type_adapter.validate_json(content) if type_adapter else content

	else:

	# This alternative branch uses Instructor + OpenAI client lib.
	# Instructor support streamed iterable responses, retry & more.
	# (see https://python.useinstructor.com/)
	#! pip install instructor openai
	import instructor, openai
	client = instructor.patch(
	openai.OpenAI(api_key="123", base_url="http://localhost:8080"),
	mode=instructor.Mode.JSON_SCHEMA)
	create_completion = client.chat.completions.create


	if __name__ == '__main__':

	class QAPair(BaseModel):
	class Config:
	extra = 'forbid' # triggers additionalProperties: false in the JSON schema
	question: str
	concise_answer: str
	justification: str
	stars: Annotated[int, Field(ge=1, le=5)]

	class PyramidalSummary(BaseModel):
	class Config:
	extra = 'forbid' # triggers additionalProperties: false in the JSON schema
	title: str
	summary: str
	question_answers: Annotated[List[QAPair], MinLen(2)]
	sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]]

	print("# Summary\n", create_completion(
	model="...",
	response_model=PyramidalSummary,
	messages=[{
	"role": "user",
	"content": f"""
	You are a highly efficient corporate document summarizer.
	Create a pyramidal summary of an imaginary internal document about our company processes
	(starting high-level, going down to each sub sections).
	Keep questions short, and answers even shorter (trivia / quizz style).
	"""
	}]))