Spaces:
Running
Running
File size: 4,943 Bytes
0157ac7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | """Command parsing utilities for API optimizations."""
import re
import shlex
_ENV_ASSIGNMENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=.*$")
def _is_env_assignment(part: str) -> bool:
"""Return True when a token is a shell-style env assignment."""
return bool(_ENV_ASSIGNMENT_RE.match(part))
def _strip_env_assignments(parts: list[str]) -> list[str]:
"""Return command parts after leading shell-style env assignments."""
cmd_start = 0
for i, part in enumerate(parts):
if _is_env_assignment(part):
cmd_start = i + 1
else:
break
return parts[cmd_start:]
def extract_command_prefix(command: str) -> str:
"""Extract the command prefix for fast prefix detection.
Parses a shell command safely, handling environment variables and
command injection attempts. Returns the command prefix suitable
for quick identification.
Returns:
Command prefix (e.g., "git", "git commit", "npm install")
or "none" if no valid command found
"""
if "`" in command or "$(" in command:
return "command_injection_detected"
try:
parts = shlex.split(command, posix=False)
if not parts:
return "none"
env_prefix = []
cmd_start = 0
for i, part in enumerate(parts):
if _is_env_assignment(part):
env_prefix.append(part)
cmd_start = i + 1
else:
break
if cmd_start >= len(parts):
return "none"
cmd_parts = parts[cmd_start:]
if not cmd_parts:
return "none"
first_word = cmd_parts[0]
two_word_commands = {
"git",
"npm",
"docker",
"kubectl",
"cargo",
"go",
"pip",
"yarn",
}
if first_word in two_word_commands and len(cmd_parts) > 1:
second_word = cmd_parts[1]
if not second_word.startswith("-"):
return f"{first_word} {second_word}"
return first_word
return first_word if not env_prefix else " ".join(env_prefix) + " " + first_word
except ValueError:
parts = command.split()
if not parts:
return "none"
cmd_parts = _strip_env_assignments(parts)
return cmd_parts[0] if cmd_parts else "none"
def extract_filepaths_from_command(command: str, output: str) -> str:
"""Extract file paths from a command locally without API call.
Determines if the command reads file contents and extracts paths accordingly.
Commands like ls/dir/find just list files, so return empty.
Commands like cat/head/tail actually read contents, so extract the file path.
Returns:
Filepath extraction result in <filepaths> format
"""
listing_commands = {
"ls",
"dir",
"find",
"tree",
"pwd",
"cd",
"mkdir",
"rmdir",
"rm",
}
reading_commands = {"cat", "head", "tail", "less", "more", "bat", "type"}
try:
parts = shlex.split(command, posix=False)
if not parts:
return "<filepaths>\n</filepaths>"
cmd_parts = _strip_env_assignments(parts)
if not cmd_parts:
return "<filepaths>\n</filepaths>"
base_cmd = cmd_parts[0].split("/")[-1].split("\\")[-1].lower()
if base_cmd in listing_commands:
return "<filepaths>\n</filepaths>"
if base_cmd in reading_commands:
filepaths = []
for part in cmd_parts[1:]:
if part.startswith("-"):
continue
filepaths.append(part)
if filepaths:
paths_str = "\n".join(filepaths)
return f"<filepaths>\n{paths_str}\n</filepaths>"
return "<filepaths>\n</filepaths>"
if base_cmd == "grep":
flags_with_args = {"-e", "-f", "-m", "-A", "-B", "-C"}
pattern_provided_via_flag = False
positional = []
skip_next = False
for part in cmd_parts[1:]:
if skip_next:
skip_next = False
continue
if part.startswith("-"):
if part in flags_with_args:
if part in {"-e", "-f"}:
pattern_provided_via_flag = True
skip_next = True
continue
positional.append(part)
filepaths = positional if pattern_provided_via_flag else positional[1:]
if filepaths:
paths_str = "\n".join(filepaths)
return f"<filepaths>\n{paths_str}\n</filepaths>"
return "<filepaths>\n</filepaths>"
return "<filepaths>\n</filepaths>"
except ValueError:
return "<filepaths>\n</filepaths>"
|