Upload 15 files
Browse files- app/solver.py +57 -48
- app/utils.py +24 -6
app/solver.py
CHANGED
|
@@ -518,13 +518,28 @@ class QuizSolver:
|
|
| 518 |
|
| 519 |
# 1. Command string extraction (e.g., "uv http get ...")
|
| 520 |
if 'command string' in question_lower or 'craft the command' in question_lower:
|
| 521 |
-
#
|
| 522 |
-
# Pattern: "uv http get ..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
command_patterns = [
|
| 524 |
-
r'(uv\s+http\s+get\s+[^\
|
|
|
|
| 525 |
r'(curl\s+[^\n<>"]+)',
|
| 526 |
r'(wget\s+[^\n<>"]+)',
|
| 527 |
-
r'(http\s+get\s+[^\n<>"]+)',
|
| 528 |
]
|
| 529 |
for pattern in command_patterns:
|
| 530 |
match = re.search(pattern, combined, re.IGNORECASE)
|
|
@@ -534,29 +549,15 @@ class QuizSolver:
|
|
| 534 |
command = ' '.join(command.split())
|
| 535 |
# Stop at certain delimiters that indicate end of command
|
| 536 |
# Remove anything after common sentence endings that aren't part of command
|
| 537 |
-
command = re.sub(r'\s+(?:Submit|Do not|Note|Remember|Important).*$', '', command, flags=re.IGNORECASE)
|
| 538 |
# Substitute <your email> or <email> with actual email if provided
|
| 539 |
if email:
|
| 540 |
command = command.replace('<your email>', email)
|
| 541 |
command = command.replace('<email>', email)
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
# Also look for commands in error responses or instructions
|
| 547 |
-
# Pattern: "Submit the command string: ..."
|
| 548 |
-
submit_command_pattern = r'[Ss]ubmit\s+the\s+command\s+string[:\s]+(uv\s+http\s+get\s+[^\n<>"]+(?:\s+-H\s+"[^"]+")?)'
|
| 549 |
-
match = re.search(submit_command_pattern, combined, re.IGNORECASE)
|
| 550 |
-
if match:
|
| 551 |
-
command = match.group(1).strip()
|
| 552 |
-
command = ' '.join(command.split())
|
| 553 |
-
# Substitute <your email> or <email> with actual email if provided
|
| 554 |
-
if email:
|
| 555 |
-
command = command.replace('<your email>', email)
|
| 556 |
-
command = command.replace('<email>', email)
|
| 557 |
-
command = command.replace('your email', email)
|
| 558 |
-
logger.info(f"Extracted command from instruction: {command[:100]}...")
|
| 559 |
-
return command
|
| 560 |
|
| 561 |
# 2. Exact path extraction (e.g., "/project2/data-preparation.md")
|
| 562 |
if 'exact' in question_lower and ('path' in question_lower or 'string' in question_lower or 'link' in question_lower):
|
|
@@ -595,52 +596,60 @@ class QuizSolver:
|
|
| 595 |
|
| 596 |
# 3. Git commands extraction (e.g., "git add ..." and "git commit ...")
|
| 597 |
if 'git' in question_lower and ('command' in question_lower or 'stage' in question_lower or 'commit' in question_lower):
|
| 598 |
-
# Look for git commands
|
| 599 |
git_commands = []
|
| 600 |
|
| 601 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 602 |
git_add_patterns = [
|
| 603 |
-
r'git\s+add\s+
|
| 604 |
-
r'(git\s+add\s+[^\n<>"]+)', #
|
| 605 |
]
|
| 606 |
for pattern in git_add_patterns:
|
| 607 |
git_add_match = re.search(pattern, combined, re.IGNORECASE)
|
| 608 |
if git_add_match:
|
| 609 |
cmd = git_add_match.group(1).strip()
|
| 610 |
-
# If it's just the file, prepend "git add "
|
| 611 |
-
if not cmd.startswith('git'):
|
| 612 |
-
cmd = f"git add {cmd}"
|
| 613 |
if cmd not in git_commands:
|
| 614 |
git_commands.append(cmd)
|
| 615 |
break
|
| 616 |
|
| 617 |
-
# Pattern for "git commit
|
| 618 |
git_commit_patterns = [
|
| 619 |
-
r'git\s+commit\s+-m\s+"
|
| 620 |
-
r'git\s+commit\s+-m\s+
|
| 621 |
-
r'(git\s+commit\s+[^\n<>"]+)', # Full command
|
| 622 |
]
|
| 623 |
for pattern in git_commit_patterns:
|
| 624 |
git_commit_match = re.search(pattern, combined, re.IGNORECASE)
|
| 625 |
if git_commit_match:
|
| 626 |
cmd = git_commit_match.group(1).strip()
|
| 627 |
-
# If
|
| 628 |
-
if not cmd
|
| 629 |
-
|
| 630 |
-
if
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
cmd = f'git commit -m "{cmd}"'
|
| 634 |
if cmd not in git_commands:
|
| 635 |
git_commands.append(cmd)
|
| 636 |
break
|
| 637 |
|
| 638 |
-
# Also check error responses like "Need git add ... then git commit ..."
|
| 639 |
-
need_pattern = r'[Nn]eed\s+(git\s+add\s+[^\s]+)\s+then\s+(git\s+commit\s+[^\n<>"]+)'
|
| 640 |
-
need_match = re.search(need_pattern, combined, re.IGNORECASE)
|
| 641 |
-
if need_match:
|
| 642 |
-
git_commands = [need_match.group(1).strip(), need_match.group(2).strip()]
|
| 643 |
-
|
| 644 |
# If we found git commands, return them
|
| 645 |
if git_commands:
|
| 646 |
# If question asks for "two commands", return them separated by newline
|
|
@@ -649,7 +658,7 @@ class QuizSolver:
|
|
| 649 |
logger.info(f"Extracted git commands: {result}")
|
| 650 |
return result
|
| 651 |
# Otherwise return the first one
|
| 652 |
-
|
| 653 |
logger.info(f"Extracted git command: {git_commands[0]}")
|
| 654 |
return git_commands[0]
|
| 655 |
|
|
|
|
| 518 |
|
| 519 |
# 1. Command string extraction (e.g., "uv http get ...")
|
| 520 |
if 'command string' in question_lower or 'craft the command' in question_lower:
|
| 521 |
+
# First, check error responses which often contain the exact command format
|
| 522 |
+
# Pattern: "Submit the command string: uv http get ..."
|
| 523 |
+
submit_command_pattern = r'[Ss]ubmit\s+the\s+command\s+string[:\s]+(uv\s+http\s+get\s+[^\n<>"]+(?:\s+-H\s+"[^"]+")?)'
|
| 524 |
+
match = re.search(submit_command_pattern, combined, re.IGNORECASE)
|
| 525 |
+
if match:
|
| 526 |
+
command = match.group(1).strip()
|
| 527 |
+
command = ' '.join(command.split())
|
| 528 |
+
# Substitute <your email> or <email> with actual email if provided
|
| 529 |
+
if email:
|
| 530 |
+
command = command.replace('<your email>', email)
|
| 531 |
+
command = command.replace('<email>', email)
|
| 532 |
+
logger.info(f"Extracted command from instruction: {command[:100]}...")
|
| 533 |
+
return command
|
| 534 |
+
|
| 535 |
+
# Look for command patterns in the page
|
| 536 |
+
# Pattern: "uv http get https://..." - need to capture full URL and optional header
|
| 537 |
+
# More specific pattern that captures the full command
|
| 538 |
command_patterns = [
|
| 539 |
+
r'(uv\s+http\s+get\s+https?://[^\s<>"]+(?:\?[^\s<>"]+)?(?:\s+-H\s+"[^"]+")?)', # Full URL with query params and header
|
| 540 |
+
r'(uv\s+http\s+get\s+https?://[^\s<>"]+)', # Just URL
|
| 541 |
r'(curl\s+[^\n<>"]+)',
|
| 542 |
r'(wget\s+[^\n<>"]+)',
|
|
|
|
| 543 |
]
|
| 544 |
for pattern in command_patterns:
|
| 545 |
match = re.search(pattern, combined, re.IGNORECASE)
|
|
|
|
| 549 |
command = ' '.join(command.split())
|
| 550 |
# Stop at certain delimiters that indicate end of command
|
| 551 |
# Remove anything after common sentence endings that aren't part of command
|
| 552 |
+
command = re.sub(r'\s+(?:Submit|Do not|Note|Remember|Important|\.\s+[A-Z]).*$', '', command, flags=re.IGNORECASE)
|
| 553 |
# Substitute <your email> or <email> with actual email if provided
|
| 554 |
if email:
|
| 555 |
command = command.replace('<your email>', email)
|
| 556 |
command = command.replace('<email>', email)
|
| 557 |
+
# Ensure we have a complete command (should have URL)
|
| 558 |
+
if 'http' in command.lower() and len(command) > 20: # Reasonable minimum length
|
| 559 |
+
logger.info(f"Extracted command string: {command[:100]}...")
|
| 560 |
+
return command
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
|
| 562 |
# 2. Exact path extraction (e.g., "/project2/data-preparation.md")
|
| 563 |
if 'exact' in question_lower and ('path' in question_lower or 'string' in question_lower or 'link' in question_lower):
|
|
|
|
| 596 |
|
| 597 |
# 3. Git commands extraction (e.g., "git add ..." and "git commit ...")
|
| 598 |
if 'git' in question_lower and ('command' in question_lower or 'stage' in question_lower or 'commit' in question_lower):
|
|
|
|
| 599 |
git_commands = []
|
| 600 |
|
| 601 |
+
# First, check error responses which often contain the exact format
|
| 602 |
+
# Pattern: "Need git add ... then git commit ..."
|
| 603 |
+
need_pattern = r'[Nn]eed\s+(git\s+add\s+[^\s]+)\s+then\s+(git\s+commit\s+[^\n<>"]+)'
|
| 604 |
+
need_match = re.search(need_pattern, combined, re.IGNORECASE)
|
| 605 |
+
if need_match:
|
| 606 |
+
cmd1 = need_match.group(1).strip()
|
| 607 |
+
cmd2 = need_match.group(2).strip()
|
| 608 |
+
# Ensure cmd2 has the message in quotes if needed
|
| 609 |
+
if '-m' in cmd2 and '"' not in cmd2 and "'" not in cmd2:
|
| 610 |
+
# Extract message and add quotes
|
| 611 |
+
msg_match = re.search(r'-m\s+([^\s]+)', cmd2)
|
| 612 |
+
if msg_match:
|
| 613 |
+
msg = msg_match.group(1)
|
| 614 |
+
cmd2 = cmd2.replace(msg, f'"{msg}"')
|
| 615 |
+
git_commands = [cmd1, cmd2]
|
| 616 |
+
result = '\n'.join(git_commands)
|
| 617 |
+
logger.info(f"Extracted git commands from error response: {result}")
|
| 618 |
+
return result
|
| 619 |
+
|
| 620 |
+
# Look for git commands in the page
|
| 621 |
+
# Pattern for "git add env.sample"
|
| 622 |
git_add_patterns = [
|
| 623 |
+
r'(git\s+add\s+env\.sample)', # Specific file
|
| 624 |
+
r'(git\s+add\s+[^\s\n<>"]+)', # General
|
| 625 |
]
|
| 626 |
for pattern in git_add_patterns:
|
| 627 |
git_add_match = re.search(pattern, combined, re.IGNORECASE)
|
| 628 |
if git_add_match:
|
| 629 |
cmd = git_add_match.group(1).strip()
|
|
|
|
|
|
|
|
|
|
| 630 |
if cmd not in git_commands:
|
| 631 |
git_commands.append(cmd)
|
| 632 |
break
|
| 633 |
|
| 634 |
+
# Pattern for "git commit -m "chore: keep env sample""
|
| 635 |
git_commit_patterns = [
|
| 636 |
+
r'(git\s+commit\s+-m\s+"[^"]+")', # With quotes
|
| 637 |
+
r'(git\s+commit\s+-m\s+[^\s\n<>"]+)', # Without quotes (will add them)
|
|
|
|
| 638 |
]
|
| 639 |
for pattern in git_commit_patterns:
|
| 640 |
git_commit_match = re.search(pattern, combined, re.IGNORECASE)
|
| 641 |
if git_commit_match:
|
| 642 |
cmd = git_commit_match.group(1).strip()
|
| 643 |
+
# If message doesn't have quotes, add them
|
| 644 |
+
if '-m' in cmd and '"' not in cmd and "'" not in cmd:
|
| 645 |
+
msg_match = re.search(r'-m\s+([^\s]+)', cmd)
|
| 646 |
+
if msg_match:
|
| 647 |
+
msg = msg_match.group(1)
|
| 648 |
+
cmd = cmd.replace(msg, f'"{msg}"')
|
|
|
|
| 649 |
if cmd not in git_commands:
|
| 650 |
git_commands.append(cmd)
|
| 651 |
break
|
| 652 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
# If we found git commands, return them
|
| 654 |
if git_commands:
|
| 655 |
# If question asks for "two commands", return them separated by newline
|
|
|
|
| 658 |
logger.info(f"Extracted git commands: {result}")
|
| 659 |
return result
|
| 660 |
# Otherwise return the first one
|
| 661 |
+
elif git_commands:
|
| 662 |
logger.info(f"Extracted git command: {git_commands[0]}")
|
| 663 |
return git_commands[0]
|
| 664 |
|
app/utils.py
CHANGED
|
@@ -74,19 +74,37 @@ def extract_submit_url(text: str, base_url: str) -> Optional[str]:
|
|
| 74 |
continue
|
| 75 |
|
| 76 |
# Try to find relative submit links (e.g. href="/submit")
|
|
|
|
| 77 |
rel_patterns = [
|
| 78 |
-
r'href=["\\\'](/[^"\\\']*submit[^"\\\']*)["\\\']',
|
| 79 |
-
r'(/[^\\s"<>\']*submit[^\\s"<>\']*)',
|
| 80 |
r'POST\s+to\s+JSON\s+to\s+(/[^\s<>"\'\)]+)', # "POST to JSON to /submit"
|
| 81 |
r'[Pp]ost\s+(?:to|at):\s+(/[^\s<>"\'\)]+)', # "POST to: /submit"
|
| 82 |
]
|
| 83 |
for pattern in rel_patterns:
|
| 84 |
matches = re.findall(pattern, text, re.IGNORECASE)
|
| 85 |
if matches:
|
| 86 |
-
candidate = matches[0].strip()
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# Try to find submit URL in the base domain with /submit path
|
| 92 |
if base_url:
|
|
|
|
| 74 |
continue
|
| 75 |
|
| 76 |
# Try to find relative submit links (e.g. href="/submit")
|
| 77 |
+
# Be more strict - only match actual submit endpoints, not paths that happen to contain "submit" in text
|
| 78 |
rel_patterns = [
|
| 79 |
+
r'href=["\\\'](/[^"\\\']*submit[^"\\\']*)["\\\']', # href="/submit" or href="/api/submit"
|
|
|
|
| 80 |
r'POST\s+to\s+JSON\s+to\s+(/[^\s<>"\'\)]+)', # "POST to JSON to /submit"
|
| 81 |
r'[Pp]ost\s+(?:to|at):\s+(/[^\s<>"\'\)]+)', # "POST to: /submit"
|
| 82 |
]
|
| 83 |
for pattern in rel_patterns:
|
| 84 |
matches = re.findall(pattern, text, re.IGNORECASE)
|
| 85 |
if matches:
|
| 86 |
+
candidate = matches[0].strip()
|
| 87 |
+
# Clean up - stop at first space, parenthesis, or other non-URL character
|
| 88 |
+
# This prevents matching text like "/path (description).Submit"
|
| 89 |
+
candidate = re.sub(r'[\s\(\)].*$', '', candidate) # Remove everything after space or paren
|
| 90 |
+
candidate = candidate.rstrip('.,;:!?)}]{["\'')
|
| 91 |
+
# Validate it's actually a submit endpoint (contains "submit" in the path)
|
| 92 |
+
# AND it doesn't contain file extensions that indicate it's a document path
|
| 93 |
+
if 'submit' in candidate.lower() and not any(ext in candidate.lower() for ext in ['.md', '.txt', '.pdf', '.html']):
|
| 94 |
+
try:
|
| 95 |
+
joined = urljoin(base_url, candidate)
|
| 96 |
+
# Final validation - ensure it's a valid URL
|
| 97 |
+
parsed = urlparse(joined)
|
| 98 |
+
if parsed.scheme and parsed.netloc:
|
| 99 |
+
logger.info(f"Found relative submit URL: {joined}")
|
| 100 |
+
return joined
|
| 101 |
+
except Exception as e:
|
| 102 |
+
logger.warning(f"Invalid relative URL candidate: {candidate}, error: {e}")
|
| 103 |
+
continue
|
| 104 |
+
|
| 105 |
+
# Don't match generic paths that happen to contain "submit" in surrounding text
|
| 106 |
+
# This was causing issues where paths like "/project2/data-preparation.md (local copy provided).Submit that exact"
|
| 107 |
+
# were being matched incorrectly
|
| 108 |
|
| 109 |
# Try to find submit URL in the base domain with /submit path
|
| 110 |
if base_url:
|