Spaces:
Runtime error
Runtime error
| # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
| import re | |
| from pydantic import BaseModel, Field, field_validator | |
| class AlpacaItem(BaseModel): | |
| r"""Represents an instruction-response item in the Alpaca format. | |
| Appropripate for both cases where input field is empty, or populated. | |
| Provides parsing from string format using the class method from_string(). | |
| Args: | |
| instruction (str): The instruction/question/prompt | |
| input (str): Input context or examples (put empty string if none) | |
| output (str): The response/answer to the instruction | |
| """ | |
| instruction: str = Field(description="The instruction/question/prompt") | |
| input: str = Field( | |
| description="Optional context or input for the task." | |
| " For example, when the instruction is \"Summarize the " | |
| "following article\", the input is the article." | |
| ) | |
| output: str = Field(description="The response/answer to the instruction") | |
| def no_section_markers(cls, value: str) -> str: | |
| r"""Ensures fields don't contain section markers like '### | |
| Response:' | |
| """ | |
| if ( | |
| '### Response' in value | |
| or '### Instruction' in value | |
| or '### Input' in value | |
| ): | |
| raise ValueError("Field cannot contain section markers") | |
| return value.strip() | |
| def from_string(cls, text: str) -> "AlpacaItem": | |
| r"""Creates an AlpacaItem from a formatted string. | |
| Args: | |
| text: String in either of these formats: | |
| With input: | |
| ### Instruction: | |
| {instruction} | |
| ### Input: | |
| {input} | |
| ### Response: | |
| {response} | |
| Without input: | |
| ### Instruction: | |
| {instruction} | |
| ### Response: | |
| {response} | |
| Returns: | |
| AlpacaItem: Parsed instance | |
| Raises: | |
| ValueError: text doesn't match expected format or sections missing | |
| """ | |
| # Strip and standardize newlines | |
| text = text.strip().replace('\r\n', '\n') | |
| # Try to extract sections using regex | |
| instruction_match = re.search( | |
| r'###\s*Instruction:\s*\n(.+?)(?=\n###|\Z)', text, re.DOTALL | |
| ) | |
| input_match = re.search( | |
| r'###\s*Input:\s*\n(.+?)(?=\n###|\Z)', text, re.DOTALL | |
| ) | |
| response_match = re.search( | |
| r'###\s*Response:\s*\n(.+?)(?=\n###|\Z)', text, re.DOTALL | |
| ) | |
| if not instruction_match or not response_match: | |
| raise ValueError( | |
| "Text must contain '### Instruction:'" | |
| " and '### Response:' sections" | |
| ) | |
| return cls( | |
| instruction=instruction_match.group(1).strip(), | |
| input=input_match.group(1).strip() if input_match else "", | |
| output=response_match.group(1).strip(), | |
| ) | |
| def to_string(self) -> str: | |
| r"""Converts the AlpacaItem to its string representation. | |
| Returns: | |
| str: Formatted string representation with sections markers | |
| """ | |
| return "\n".join( | |
| [ | |
| "### Instruction:", | |
| self.instruction, | |
| "", | |
| "### Input:", | |
| self.input, | |
| "", | |
| "### Response:", | |
| self.output, | |
| ] | |
| ) | |