100XZX001's picture
Upload 19 files
4036b6f verified
# tools.py – Real vector retrieval for query_docs, linter, and test runner
import subprocess
import tempfile
import os
from dataclasses import dataclass
from sentence_transformers import SentenceTransformer
import chromadb
@dataclass
class ToolBox:
_embedder = None
_client = None
_collection = None
@classmethod
def _get_embedder(cls):
if cls._embedder is None:
cls._embedder = SentenceTransformer('all-MiniLM-L6-v2')
return cls._embedder
@classmethod
def _get_collection(cls):
if cls._collection is None:
cls._client = chromadb.Client()
cls._collection = cls._client.create_collection("docs")
# Pre‑load real documentation snippets (can be extended)
docs = [
"KeyError occurs when a dictionary key is missing. Use dict.get() or check 'if key in dict'.",
"pylint error C0304: missing final newline. Add a newline at the end of file.",
"Deadlock happens when two threads acquire locks in opposite order. Always acquire locks in the same order.",
"Division by zero: check if list is empty before calculating average, or use try/except.",
"Threading.Lock: use 'with lock:' to automatically acquire and release.",
"Off‑by‑one errors: adjust loop ranges, e.g., range(1, len(arr)-1).",
]
embedder = cls._get_embedder()
embeddings = embedder.encode(docs).tolist()
for i, doc in enumerate(docs):
cls._collection.add(ids=[str(i)], documents=[doc], embeddings=[embeddings[i]])
return cls._collection
@staticmethod
def run_linter(code: str) -> str:
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as f:
f.write(code)
f.flush()
tmp_path = f.name
try:
result = subprocess.run(
['pylint', tmp_path, '--exit-zero', '--output-format=text'],
capture_output=True,
text=True,
timeout=10,
encoding='utf-8'
)
output = result.stdout
if "Your code has been rated" in output:
output = output.split("Your code has been rated")[0]
output = output.strip()
if not output:
return "No linting issues found."
return output[:500]
except FileNotFoundError:
return "Linter (pylint) not installed."
except subprocess.TimeoutExpired:
return "Linter timed out."
except Exception as e:
return f"Linter error: {str(e)}"
finally:
try:
os.unlink(tmp_path)
except:
pass
@staticmethod
def run_tests(test_script: str) -> str:
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as f:
f.write(test_script)
f.flush()
tmp_path = f.name
try:
result = subprocess.run(
['python', tmp_path],
capture_output=True,
text=True,
timeout=10,
encoding='utf-8'
)
output = result.stdout + result.stderr
return output.strip() or "Test executed successfully (no output)."
except subprocess.TimeoutExpired:
return "Test execution timed out."
except Exception as e:
return f"Test runner error: {str(e)}"
finally:
try:
os.unlink(tmp_path)
except:
pass
@classmethod
def query_docs(cls, topic: str) -> str:
"""Retrieve top 3 relevant docs. Forces agent to reason across multiple hints."""
try:
embedder = cls._get_embedder()
collection = cls._get_collection()
query_emb = embedder.encode([topic]).tolist()
# Get top 3 results (not just 1)
results = collection.query(query_embeddings=query_emb, n_results=3)
if results['documents'] and results['documents'][0]:
# Return concatenated snippets, labelled for clarity
snippets = []
for i, doc in enumerate(results['documents'][0]):
snippets.append(f"[{i+1}] {doc}")
return "Relevant documentation:\n" + "\n".join(snippets)
return "No relevant documentation found."
except Exception:
# Fallback to keyword matching
topic_lower = topic.lower()
fallback = {
"null check": "To avoid KeyError, use 'if key in dict:' before accessing.",
"keyerror": "Catch KeyError with try/except or use dict.get().",
"deadlock": "Always acquire locks in the same order to avoid deadlock.",
}
for key, value in fallback.items():
if key in topic_lower:
return value
return "No relevant documentation found. Try being more specific."