AutoPage

Runtime error

App Files Files Community

AutoPage / camel /messages /conversion /alpaca.py

Mqleet

upd code

fcaa164 2 months ago

raw

history blame

4.16 kB

	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========

	import re

	from pydantic import BaseModel, Field, field_validator


	class AlpacaItem(BaseModel):
	r"""Represents an instruction-response item in the Alpaca format.

	Appropripate for both cases where input field is empty, or populated.
	Provides parsing from string format using the class method from_string().

	Args:
	instruction (str): The instruction/question/prompt
	input (str): Input context or examples (put empty string if none)
	output (str): The response/answer to the instruction
	"""

	instruction: str = Field(description="The instruction/question/prompt")
	input: str = Field(
	description="Optional context or input for the task."
	" For example, when the instruction is \"Summarize the "
	"following article\", the input is the article."
	)
	output: str = Field(description="The response/answer to the instruction")

	@field_validator('instruction', 'output')
	def no_section_markers(cls, value: str) -> str:
	r"""Ensures fields don't contain section markers like '###
	Response:'
	"""
	if (
	'### Response' in value
	or '### Instruction' in value
	or '### Input' in value
	):
	raise ValueError("Field cannot contain section markers")
	return value.strip()

	@classmethod
	def from_string(cls, text: str) -> "AlpacaItem":
	r"""Creates an AlpacaItem from a formatted string.

	Args:
	text: String in either of these formats:
	With input:
	### Instruction:
	{instruction}
	### Input:
	{input}
	### Response:
	{response}

	Without input:
	### Instruction:
	{instruction}
	### Response:
	{response}

	Returns:
	AlpacaItem: Parsed instance

	Raises:
	ValueError: text doesn't match expected format or sections missing
	"""
	# Strip and standardize newlines
	text = text.strip().replace('\r\n', '\n')

	# Try to extract sections using regex
	instruction_match = re.search(
	r'###\sInstruction:\s\n(.+?)(?=\n###\|\Z)', text, re.DOTALL
	)
	input_match = re.search(
	r'###\sInput:\s\n(.+?)(?=\n###\|\Z)', text, re.DOTALL
	)
	response_match = re.search(
	r'###\sResponse:\s\n(.+?)(?=\n###\|\Z)', text, re.DOTALL
	)

	if not instruction_match or not response_match:
	raise ValueError(
	"Text must contain '### Instruction:'"
	" and '### Response:' sections"
	)

	return cls(
	instruction=instruction_match.group(1).strip(),
	input=input_match.group(1).strip() if input_match else "",
	output=response_match.group(1).strip(),
	)

	def to_string(self) -> str:
	r"""Converts the AlpacaItem to its string representation.

	Returns:
	str: Formatted string representation with sections markers
	"""
	return "\n".join(
	[
	"### Instruction:",
	self.instruction,
	"",
	"### Input:",
	self.input,
	"",
	"### Response:",
	self.output,
	]
	)