Spaces:

OnyxMunk
/

GravityFalls

Paused

GravityFalls / python /helpers /extract_tools.py

frdel

docker files, agent number fix, json misformat message

c425c40 over 1 year ago

6.78 kB

	import re, os
	from typing import Any
	from . import files
	# import dirtyjson
	from .dirty_json import DirtyJson
	import regex


	def json_parse_dirty(json:str) -> dict[str,Any] \| None:
	ext_json = extract_json_object_string(json)
	if ext_json:
	# ext_json = fix_json_string(ext_json)
	data = DirtyJson.parse_string(ext_json)
	if isinstance(data,dict): return data
	return None

	def extract_json_object_string(content):
	start = content.find('{')
	if start == -1:
	return ""

	# Find the first '{'
	end = content.rfind('}')
	if end == -1:
	# If there's no closing '}', return from start to the end
	return content[start:]
	else:
	# If there's a closing '}', return the substring from start to end
	return content[start:end+1]

	def extract_json_string(content):
	# Regular expression pattern to match a JSON object
	pattern = r'\{(?:[^{}]\|(?R))\}\|\[(?:[^\[\]]\|(?R))\]\|"(?:\\.\|[^"\\])*"\|true\|false\|null\|-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?'

	# Search for the pattern in the content
	match = regex.search(pattern, content)

	if match:
	# Return the matched JSON string
	return match.group(0)
	else:
	print("No JSON content found.")
	return ""

	def fix_json_string(json_string):
	# Function to replace unescaped line breaks within JSON string values
	def replace_unescaped_newlines(match):
	return match.group(0).replace('\n', '\\n')

	# Use regex to find string values and apply the replacement function
	fixed_string = re.sub(r'(?<=: ")(.*?)(?=")', replace_unescaped_newlines, json_string, flags=re.DOTALL)
	return fixed_string

	# def extract_tool_requests2(response):
	# # Regex to match the tags ending with $, allowing for varying whitespace
	# pattern = r'<(\w+)\$[\s](.?)>([\s\S]*?)(?=<\w+\$\|<\/\1\$\|$)'
	# matches = re.findall(pattern, response, re.DOTALL)

	# tool_usages = []
	# allowed_tags = list_python_files("tools")

	# for match in matches:
	# tag_name, attributes, content = match

	# if tag_name not in allowed_tags: continue

	# tool_dict = {}
	# tool_dict['name'] = tag_name
	# tool_dict['args'] = {}

	# # Parse attributes
	# for attr in re.findall(r'(\w+)\s=\s"([^"]+)"', attributes):
	# tool_dict['args'][attr[0]] = attr[1]

	# # Add body content
	# tool_dict["content"] = content.strip()
	# tool_dict["index"] = len(tool_usages)
	# tool_usages.append(tool_dict)

	# return tool_usages

	# def extract_tool_requests(response):
	# # Regex to match the tool blocks, allowing for varying whitespace
	# pattern = r'<tool\$[\s](.?)>(.?)<\/tool\$\s>'
	# matches = re.findall(pattern, response, re.DOTALL)

	# tool_usages = []

	# for match in matches:
	# attributes, body = match
	# tool_dict = {}
	# # Parse attributes
	# for attr in re.findall(r'(\w+)\s=\s"([^"]+)"', attributes):
	# tool_dict[attr[0]] = attr[1]
	# # Add body content
	# tool_dict["body"] = body.strip()
	# tool_usages.append(tool_dict)

	# return tool_usages

	# def extract_specified_tags(response):

	# allowed_tags = list_python_files("tools")

	# # Create a regex pattern to match specified tags and their attributes
	# pattern = r'<({})([\s\S]*?)>'.format('\|'.join(allowed_tags))
	# matches = re.findall(pattern, response, re.DOTALL)

	# extracted_tags = []

	# for match in matches:
	# tag_name, attributes = match
	# tag_dict = {}
	# tag_dict['name'] = tag_name

	# # Parse attributes
	# for attr in re.findall(r'(\w+)\s=\s"([^"]+)"', attributes):
	# tag_dict[attr[0]] = attr[1]

	# # Extract the body text (everything after the tag until the next tag or end of string)
	# body_pattern = r'<{0}[\s\S]?>([\s\S]?)(?=<\|$)'.format(tag_name)
	# body_match = re.search(body_pattern, response, re.DOTALL)
	# tag_dict['body'] = body_match.group(1).strip() if body_match else ''

	# extracted_tags.append(tag_dict)

	# return extracted_tags

	# def list_python_files(directory):
	# # List all files in the given directory
	# list = os.listdir(files.get_abs_path(directory))
	# # Filter for Python files and remove the extension
	# python_files = { os.path.splitext(file)[0] for file in list if file.endswith('.py') }
	# return python_files

	# import re
	# from xml.etree import ElementTree as ET

	# def extract_tool_usages_advanced(response):
	# tool_usages = []
	# pattern = re.compile(r'<tool.*?>', re.DOTALL)

	# start_pos = 0
	# while start_pos < len(response):
	# match = pattern.search(response, start_pos)
	# if not match:
	# break

	# tag_start = match.start()
	# tag_end = match.end()
	# end_tag = '</tool>'

	# # To find the corresponding end tag correctly handling nested tags
	# depth = 1
	# search_pos = tag_end

	# while depth > 0:
	# next_open = response.find('<tool', search_pos)
	# next_close = response.find(end_tag, search_pos)

	# if next_close == -1:
	# break

	# if next_open != -1 and next_open < next_close:
	# depth += 1
	# search_pos = next_open + len('<tool')
	# else:
	# depth -= 1
	# search_pos = next_close + len(end_tag)

	# end_tag_end = search_pos

	# # Extract the whole tool block
	# tool_block = response[tag_start:end_tag_end]

	# try:
	# element = ET.fromstring(tool_block)
	# tool_dict = element.attrib
	# tool_dict["body"] = ET.tostring(element, encoding='unicode', method='xml').split('>', 1)[1].rsplit('<', 1)[0].strip()
	# tool_usages.append(tool_dict)
	# except ET.ParseError:
	# # In case of parsing error, fall back to including entire content between the tags
	# body_content = response[tag_end:end_tag_end - len(end_tag)].strip()
	# tool_dict = {"name": re.search(r'name="(.*?)"', match.group(0)).group(1), "body": body_content}
	# tool_usages.append(tool_dict)

	# start_pos = end_tag_end

	# return tool_usages

	# # Example usage with the given input
	# response = """
	# <tool name="code_execution_tool">
	# #comment <tool<tool name="abc><tool><loot><tool>"
	# print(text)
	# </tool>
	# """

	# tool_usages = extract_tool_usages(response)
	# print(tool_usages)