File size: 10,503 Bytes
3ec78dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 |
import os
import tempfile
import shutil
import zipfile
import tarfile
from pathlib import Path
from typing import Dict, List, Optional, Tuple
def _extract_zip(path: Path) -> str:
temp_dir = tempfile.mkdtemp()
with zipfile.ZipFile(path, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
return temp_dir
def _extract_tgz(path: Path) -> str:
temp_dir = tempfile.mkdtemp()
with tarfile.open(path, 'r:gz') as tar_ref:
tar_ref.extractall(temp_dir)
return temp_dir
def prepare_input_path(path: str) -> str:
"""Handles different input types: directories, files, zip or tgz archives."""
path_obj = Path(path)
if path_obj.is_dir():
return str(path_obj)
if path_obj.suffix == '.zip':
return _extract_zip(path_obj)
elif path_obj.suffix in {'.tgz', '.tar.gz'}:
return _extract_tgz(path_obj)
elif path_obj.is_file():
# Copy single file to a temporary directory
temp_dir = tempfile.mkdtemp()
shutil.copy(path_obj, temp_dir)
return temp_dir
else:
raise ValueError(f"Unsupported path type or extension: {path}")
def file_path_to_module_path(file_path: str) -> str:
"""
Convert a file path to a module path by replacing path separators with dots
and removing the file extension.
Examples:
path/to/repo/python_script.py -> path.to.repo.python_script
src/utils/helper.py -> src.utils.helper
module.py -> module
Args:
file_path: File path string
Returns:
Module path with dots instead of slashes
"""
# Normalize path separators
normalized = file_path.replace('\\', '/').replace(os.sep, '/')
# Remove file extension
without_ext = os.path.splitext(normalized)[0]
# Replace / with .
module_path = without_ext.replace('/', '.')
return module_path
def generate_entity_aliases(entity_name: str, file_path: str) -> list:
"""
Generate all possible aliases for an entity based on its name and file path.
For example, if a file 'path/to/repo/python_script.py' defines 'Class_1',
the aliases would be:
- Class_1 (simple name)
- path.to.repo.python_script.Class_1 (fully qualified from file path)
For C++ namespaced entities like 'math::Calculator':
- math::Calculator (fully qualified name)
- Calculator (unqualified name, for use with 'using namespace')
- math.calculator.math::Calculator (module-based fully qualified)
For temporary paths like '.tmp.tmptqky4yk4..pyinstaller.run_astropy_tests.pos':
- pos (simple name)
- .run_astropy_tests.pos (progressive path removal)
- pyinstaller.run_astropy_tests.pos (further removal)
- .tmp.tmptqky4yk4..pyinstaller.run_astropy_tests.pos (full path)
Args:
entity_name: The name of the entity (e.g., 'Class_1', 'my_function', 'math::Calculator')
file_path: The file path where the entity is defined
Returns:
List of alias strings
"""
aliases = []
# Always include the simple entity name
aliases.append(entity_name)
# For C++/C-style namespaced entities (using ::), add the unqualified name
if '::' in entity_name:
# Extract the unqualified name (last part after ::)
unqualified_name = entity_name.split('::')[-1]
if unqualified_name != entity_name:
aliases.append(unqualified_name)
# Generate module-based alias
module_path = file_path_to_module_path(file_path)
# If entity_name already contains scope separators (., ::),
# it might be a nested entity (e.g., 'MyClass.my_method')
# In this case, add the module path before the entire qualified name
fully_qualified = f"{module_path}.{entity_name}"
# Generate progressive path aliases by removing temporary/noise components
# Split the module path into components
components = module_path.split('.')
# Filter out components that look like temporary directories or UUIDs
def is_temp_component(component: str) -> bool:
"""Check if a path component looks like a temporary directory."""
if not component:
return True
# Check for common temp directory patterns
if component.startswith('tmp') and len(component) > 3:
return True
if component.startswith('.tmp'):
return True
# Check for UUID-like patterns (long alphanumeric strings)
if len(component) > 8 and component.replace('_', '').replace('-', '').isalnum():
# If it's mostly lowercase and has mix of letters and numbers, likely a temp ID
if sum(c.islower() for c in component) > len(component) / 2:
if sum(c.isdigit() for c in component) > 2:
return True
return False
# Generate aliases by progressively including more path components
# Start from the rightmost meaningful components and work backwards
clean_components = []
for component in components:
if not is_temp_component(component):
clean_components.append(component)
# Generate aliases with increasing path depth from meaningful components
if clean_components:
for i in range(1, len(clean_components) + 1):
# Take the last i components
partial_path = '.'.join(clean_components[-i:])
partial_alias = f".{partial_path}.{entity_name}"
if partial_alias != entity_name and partial_alias not in aliases:
aliases.append(partial_alias)
# Also add without leading dot for the full clean path
if i == len(clean_components):
no_dot_alias = f"{partial_path}.{entity_name}"
if no_dot_alias != entity_name and no_dot_alias not in aliases:
aliases.append(no_dot_alias)
# Always add the fully qualified path at the end (even if it contains temp components)
if fully_qualified != entity_name and fully_qualified not in aliases:
aliases.append(fully_qualified)
return aliases
def normalize_include_path(include_path: str) -> str:
"""
Normalize an include path from #include directive to a module-like path.
Examples:
<vector> -> vector
<iostream> -> iostream
"myheader.h" -> myheader
"utils/helper.h" -> utils.helper
<boost/algorithm/string.hpp> -> boost.algorithm.string
Args:
include_path: The include path from #include directive
Returns:
Normalized module-like path
"""
# Remove angle brackets and quotes
path = include_path.strip('<>"')
# Convert to module path
module_path = file_path_to_module_path(path)
return module_path
def build_entity_alias_map(entities: Dict[str, Dict]) -> Dict[str, str]:
"""
Build a mapping from all entity aliases to their canonical entity names.
This allows quick lookup when matching called entities to their definitions.
Args:
entities: Dictionary of entity info keyed by canonical entity name
Returns:
Dictionary mapping alias -> canonical entity name
"""
alias_map = {}
for entity_name, info in entities.items():
# Map the canonical name to itself
alias_map[entity_name] = entity_name
# Map all aliases to the canonical name
aliases = info.get('aliases', [])
for alias in aliases:
if alias and alias not in alias_map:
alias_map[alias] = entity_name
return alias_map
def resolve_entity_call(called_name: str, alias_map: Dict[str, str],
imports: List[str] = None) -> Optional[str]:
"""
Resolve a called entity name to its canonical definition using aliases.
This handles cases like:
- Direct call: 'MyClass' -> 'MyClass'
- Qualified call: 'module.MyClass' -> 'MyClass' (if alias exists)
- Imported call: 'helper' -> 'utils.helper' (if imported)
- Simple name to qualified: 'Calculator' -> 'utils::Calculator'
Args:
called_name: The name of the called entity
alias_map: Mapping from aliases to canonical entity names
imports: List of import paths (optional, for context)
Returns:
Canonical entity name if found, None otherwise
"""
# Don't try to resolve empty strings
if not called_name or not called_name.strip():
return None
# Direct match
if called_name in alias_map:
return alias_map[called_name]
# Try partial matches if imports are provided
if imports:
for import_path in imports:
# Try combining import path with called name
qualified = f"{import_path}.{called_name}"
if qualified in alias_map:
return alias_map[qualified]
# Try with :: separator (C++/Rust style)
qualified_cpp = f"{import_path}::{called_name}"
if qualified_cpp in alias_map:
return alias_map[qualified_cpp]
# Try fuzzy matching - look for canonical names that end with the called name
# This helps match 'Calculator' to 'utils::Calculator' or 'MyClass' to 'module.MyClass'
simple_name = extract_simple_name(called_name)
candidates = []
for alias, canonical in alias_map.items():
alias_simple = extract_simple_name(alias)
# If the simple names match, this could be a match
if alias_simple == simple_name:
candidates.append(canonical)
# If we found exactly one candidate, return it
if len(candidates) == 1:
return candidates[0]
# If we have multiple candidates, prefer the shortest qualified name
# (most likely to be the direct definition rather than an alias)
if len(candidates) > 1:
return min(candidates, key=lambda x: len(x))
return None
def extract_simple_name(qualified_name: str) -> str:
"""
Extract the simple name from a qualified name.
Examples:
'namespace::MyClass' -> 'MyClass'
'module.MyClass' -> 'MyClass'
'MyClass' -> 'MyClass'
Args:
qualified_name: Fully or partially qualified name
Returns:
Simple name without namespace/module prefix
"""
# Handle C++ style namespace separator
if '::' in qualified_name:
return qualified_name.split('::')[-1]
# Handle Python/JS style module separator
if '.' in qualified_name:
return qualified_name.split('.')[-1]
return qualified_name
|