Spaces:
Sleeping
Sleeping
| # TODO: create a csv parser | |
| from __future__ import annotations | |
| from ast import Lambda | |
| import contextlib | |
| import csv | |
| from pathlib import Path | |
| from typing import TYPE_CHECKING, Callable | |
| import yaml | |
| if TYPE_CHECKING: | |
| from io import TextIOWrapper | |
| class CsvParser: | |
| def __init__(self, directory: str) -> None: | |
| self.yaml_path = Path.joinpath(Path.cwd(), Path(f"{directory}/config.yaml")) | |
| self.csv_directory = Path.joinpath(Path.cwd(), Path(f"{directory}/csv_files")) | |
| csv_files = Path(self.csv_directory).glob("*") | |
| self.csv_file_paths = [file for file in csv_files if file.is_file()] | |
| def csv_parser(self) -> None: | |
| """This is going to take in a big csv, split it, and put it in config.yaml""" | |
| # This is going to parse multiple different csv files this time. | |
| split_csv = {} | |
| for csv_file in self.csv_file_paths: | |
| with Path.open(csv_file, "r", newline="") as csvfile: | |
| self.split_csv(csvfile, split_csv) | |
| # split_csv should have all the information | |
| yaml_data = yaml.safe_load(self.yaml_path.read_text()) | |
| # Rulesets CHANGE | |
| try: | |
| yaml_data["rulesets"] = self.csv_rulesets( | |
| split_csv["Ruleset ID"] | |
| ) # Rulesets | |
| except KeyError: | |
| print("No rulesets") | |
| # Agents DONE | |
| try: | |
| yaml_data["structures"] = self.csv_agents( | |
| split_csv["Agent ID"] | |
| ) # Agent Definitions | |
| except KeyError: | |
| print("No structures") | |
| # States | |
| # Tailoring (affects the states section only) CHANGE | |
| if "State ID to Tailor" in split_csv: | |
| try: | |
| yaml_data["states"] = self.csv_states( | |
| split_csv["State ID"], # State Definitions | |
| split_csv["State ID to Tailor"], # Agent Tailoring State ID | |
| ) | |
| except KeyError: | |
| print(" no states") | |
| else: | |
| try: | |
| yaml_data["states"] = self.csv_states( | |
| split_csv["State ID"], # State Definitions | |
| [], # Agent Tailoring State ID | |
| ) | |
| except KeyError: | |
| print(" no states") | |
| try: | |
| yaml_data["prompts"] = self.csv_prompts(split_csv["Prompt ID"]) | |
| except KeyError: | |
| print("no prompts") | |
| # # Transitioning (affects event section) DONE | |
| try: | |
| yaml_data["events"] = self.csv_transition_id( | |
| split_csv["Transition ID"] | |
| ) # State Transitions | |
| except KeyError: | |
| print("No transitions") | |
| # That's all folks! | |
| self.update_and_save(yaml_data) | |
| def split_csv(self, csv_file: TextIOWrapper, all_information: dict) -> None: | |
| """Takes in a csv_file, and splits it into a dictionary that is headed by each of the sections. | |
| Hooray! | |
| """ | |
| reader = csv.reader(csv_file) | |
| # Get the header of the section | |
| header = next(reader) | |
| header = header[0] # Go to the meat of it (get rid of descriptive header) | |
| current_information = [] | |
| for row in reader: | |
| key = row[0] | |
| # If the row is empty and/or has no value in the first column. | |
| if key == ",,": | |
| continue | |
| current_information.append({key: row[1:]}) | |
| all_information[header] = current_information | |
| def csv_kbs(self, kb_info: list) -> dict: | |
| dictionary = {} | |
| for row in kb_info: | |
| key, value = row.popitem() | |
| if key and value[0] and value[1]: | |
| dictionary[key] = {"file_path": value[0], "file_type": value[1]} | |
| return dictionary | |
| def csv_rulesets(self, ruleset_info: list) -> dict: | |
| dictionary = {} | |
| for row in ruleset_info: | |
| key, value = row.popitem() | |
| if key and value[0] and value[1]: | |
| rules = [ | |
| rule.strip().strip('"').lstrip("- ") | |
| for rule in value[1].split("\n") | |
| if rule.strip() | |
| ] | |
| dictionary[key] = { | |
| "name": value[0], | |
| "rules": rules, | |
| } # Will have to check this. | |
| return dictionary | |
| def csv_prompts(self, prompt_info: list) -> dict: | |
| dictionary = {} | |
| for row in prompt_info: | |
| key, value = row.popitem() | |
| if key and value[0]: | |
| dictionary[key] = {"prompt": value[0]} | |
| if value[1]: | |
| dictionary[key]["author_intent"] = value[1] | |
| return dictionary | |
| def csv_agents(self, agent_info: list) -> dict: | |
| dictionary = {} | |
| for row in agent_info: | |
| key, value = row.popitem() | |
| if key: | |
| ruleset_ids = [] | |
| if value[0]: | |
| ruleset_ids = [rule_id.strip() for rule_id in value[0].split(",")] | |
| config = { | |
| "model": "gpt-4o", | |
| "ruleset_ids": ruleset_ids, | |
| } | |
| # If there is a global KB used | |
| if value[1]: | |
| config["vector_stores"] = [value[1]] | |
| # If there is a global prompt used (can be overrided by state specfic) | |
| if value[2]: | |
| config["prompt_id"] = value[2] | |
| # If there is a model override | |
| if value[4]: | |
| config["model"] = value[4] | |
| dictionary[key] = config | |
| return dictionary | |
| def csv_states(self, state_info: list, tailor_info: list) -> dict: | |
| states = {} | |
| for row in state_info: | |
| key, value = row.popitem() | |
| if not key: | |
| continue | |
| if key == "start": | |
| states[key] = {"initial": True} | |
| elif key == "end": | |
| states[key] = {"final": True} | |
| else: | |
| states[key] = {} | |
| if value[0] and value[0] != "none": | |
| agent_list = {name.strip(): {} for name in value[0].split(",")} | |
| states[key]["structures"] = agent_list | |
| for row in tailor_info: | |
| tailor, value = row.popitem() | |
| if not tailor: | |
| continue | |
| structures = ( | |
| states[tailor]["structures"] | |
| if tailor in states and "structures" in states[tailor] | |
| else {} | |
| ) | |
| structure = value | |
| structure_name = structure[0] | |
| # if ruleset | |
| try: | |
| structure_ruleset = structure[1] | |
| structure_ruleset_list = [] | |
| for item in structure_ruleset.split(","): | |
| if item.strip() != "": | |
| structure_ruleset_list.append(item.strip()) | |
| if len(structure_ruleset_list): | |
| structures[structure_name] = { | |
| "ruleset_ids": structure_ruleset_list, | |
| } | |
| except KeyError: | |
| structures[structure_name] = {} | |
| try: | |
| if structure[2]: | |
| structures[structure_name]["prompt_id"] = structure[2] | |
| except KeyError: | |
| pass | |
| states[tailor] = {"structures": structures} | |
| return states | |
| def csv_transition_id(self, transition_info: list) -> dict: | |
| events = {} | |
| for row in transition_info: | |
| key, value = row.popitem() | |
| if key and value[0] and value[1]: | |
| if key in events: | |
| # Add the transition if there already are transitions | |
| events[key]["transitions"].append( | |
| {"from": value[0], "to": value[1]} | |
| ) | |
| else: | |
| # create the first transition | |
| events[key] = { | |
| "transitions": [ | |
| {"from": value[0], "to": value[1]}, | |
| ] | |
| } | |
| return events | |
| def update_and_save(self, config: dict) -> None: | |
| with self.yaml_path.open("w") as file: | |
| yaml.dump(config, file, default_flow_style=False, line_break="\n") | |
| if __name__ == "__main__": | |
| CsvParser("uw_programmatic").csv_parser() | |