Spaces:
Build error
Build error
| """ | |
| Utility functions for parsing text-based dataset files for LLM response comparator. | |
| """ | |
| import re | |
| from pathlib import Path | |
| def parse_text_file(file_path): | |
| """ | |
| Parse a text file to extract prompt, response1, model1, response2, and model2. | |
| Format: | |
| - \prompt= followed by the prompt text | |
| - \response1= followed by the first model's response | |
| - \model1= followed by the first model's name | |
| - \response2= followed by the second model's response | |
| - \model2= followed by the second model's name | |
| Args: | |
| file_path (str): Path to the text file. | |
| Returns: | |
| dict: Dictionary with prompt, response1, model1, response2, and model2. | |
| """ | |
| with open(file_path, 'r', encoding='utf-8') as file: | |
| content = file.read() | |
| # Extract sections using regular expressions | |
| prompt = re.search(r'\\prompt=(.*?)(?=\\response1=|$)', content, re.DOTALL) | |
| response1 = re.search(r'\\response1=(.*?)(?=\\model1=|$)', content, re.DOTALL) | |
| model1 = re.search(r'\\model1=(.*?)(?=\\response2=|$)', content, re.DOTALL) | |
| response2 = re.search(r'\\response2=(.*?)(?=\\model2=|$)', content, re.DOTALL) | |
| model2 = re.search(r'\\model2=(.*?)(?=$)', content, re.DOTALL) | |
| return { | |
| "prompt": prompt.group(1).strip() if prompt else "", | |
| "response1": response1.group(1).strip() if response1 else "", | |
| "model1": model1.group(1).strip() if model1 else "", | |
| "response2": response2.group(1).strip() if response2 else "", | |
| "model2": model2.group(1).strip() if model2 else "" | |
| } | |
| def load_text_file(file_path): | |
| """ | |
| Load a single text file as a dataset entry. | |
| Args: | |
| file_path (str): Path to the text file. | |
| Returns: | |
| dict: Dataset entry with prompt, response1, model1, response2, and model2. | |
| """ | |
| return parse_text_file(file_path) | |
| def load_builtin_datasets(directory_path): | |
| """ | |
| Load all built-in datasets from a directory. | |
| Args: | |
| directory_path (str): Path to the directory containing text files. | |
| Returns: | |
| list: List of dataset entries. | |
| """ | |
| path = Path(directory_path) | |
| text_files = list(path.glob('*.txt')) | |
| return [parse_text_file(str(file_path)) for file_path in text_files] |