Spaces:
Sleeping
Sleeping
| import os | |
| import shutil | |
| from rag_pipeline import hybrid_search, smart_file_filter | |
| from langchain_community.vectorstores import Chroma | |
| def search_code(query, vectorstore: Chroma, repo_path: str, k: int = 5): | |
| """ | |
| Find chunks of code using hybrid search (semantic + keyword). | |
| Returns a string representation of the findings. | |
| """ | |
| results = hybrid_search(vectorstore, repo_path, query, k=k) | |
| output = [] | |
| for i, doc in enumerate(results): | |
| path = doc.metadata.get('file_path', 'Unknown') | |
| output.append(f"--- Result {i+1} [{path}] ---") | |
| output.append(doc.page_content[:1500]) # limit each chunk to avoid overwhelming context | |
| output.append("\n") | |
| return "\n".join(output) | |
| def open_file(path, repo_path): | |
| """ | |
| Open and read the full content of a file from the repository. | |
| Handles common path aliases like '@/'. | |
| """ | |
| # Clean common path aliases/prefixes to stay consistent with local filesystem | |
| clean_path = path.lstrip("/").replace("@/", "") | |
| if clean_path.startswith("src/"): | |
| # Check if src/ actually exists as a directory, if not, strip it | |
| if not os.path.exists(os.path.join(repo_path, "src")): | |
| clean_path = clean_path.replace("src/", "", 1) | |
| abs_repo = os.path.abspath(repo_path).replace("\\", "/") | |
| full_path = os.path.join(repo_path, clean_path).replace("\\", "/") | |
| abs_file = os.path.abspath(full_path).replace("\\", "/") | |
| if not abs_file.startswith(abs_repo): | |
| return f"Error: Path '{path}' is outside repo." | |
| if not os.path.exists(abs_file): | |
| return f"Error: File '{path}' not found (Original path: {path})." | |
| if os.path.isdir(abs_file): | |
| return f"Error: '{path}' is a directory. Use list_files or search instead." | |
| try: | |
| with open(abs_file, 'r', encoding='utf-8', errors='ignore') as f: | |
| content = f.read() | |
| # Limit returned size to avoid blowing up context | |
| if len(content) > 5000: | |
| return content[:5000] + "\n\n... (file truncated) ..." | |
| return content | |
| except Exception as e: | |
| return f"Error reading file '{path}': {e}" | |
| def list_files(repo_path, path="."): | |
| """ | |
| List files in a specific directory within the repository. | |
| """ | |
| clean_path = path.lstrip("/").replace("@/", "") | |
| if clean_path.startswith("src/"): | |
| if not os.path.exists(os.path.join(repo_path, "src")): | |
| clean_path = clean_path.replace("src/", "", 1) | |
| abs_repo = os.path.abspath(repo_path).replace("\\", "/") | |
| target_dir = os.path.join(repo_path, clean_path).replace("\\", "/") | |
| abs_target = os.path.abspath(target_dir).replace("\\", "/") | |
| if not abs_target.startswith(abs_repo): | |
| return f"Error: Path '{path}' is outside repo." | |
| if not os.path.exists(abs_target): | |
| return f"Error: Directory '{path}' not found (Original path: {path})." | |
| if not os.path.isdir(abs_target): | |
| return f"Error: '{path}' is not a directory." | |
| try: | |
| items = [] | |
| # Get immediate children (files and dirs) | |
| for root, dirs, filenames in os.walk(abs_target): | |
| # Show directories with a trailing slash | |
| # Filter out hidden/junk dirs | |
| visible_dirs = [d for d in dirs if not d.startswith('.') and d not in ('node_modules', '__pycache__', 'vendor', 'dist', 'build', '.git')] | |
| for d in sorted(visible_dirs): | |
| items.append(f"{d}/") | |
| # Show files | |
| for f in sorted(filenames): | |
| rel_path = os.path.relpath(os.path.join(root, f), abs_repo) | |
| # Show filename only if inside the target dir | |
| if os.path.dirname(os.path.join(root, f)) == abs_target: | |
| if smart_file_filter(f): # Filter only the filename for listing | |
| items.append(f) | |
| # Only show top level of the requested path | |
| break | |
| if not items: | |
| return f"No relevant files or directories found in '{path}'." | |
| # Group result for LLM | |
| return f"Contents of '{path}':\n" + "\n".join(items[:30]) # Limit for brief summary | |
| except Exception as e: | |
| return f"Error listing files in '{path}': {e}" | |