iitmbs24f commited on
Commit
23fc6bf
·
verified ·
1 Parent(s): b6a94a6

Upload 15 files

Browse files
Files changed (2) hide show
  1. app/solver.py +57 -48
  2. app/utils.py +24 -6
app/solver.py CHANGED
@@ -518,13 +518,28 @@ class QuizSolver:
518
 
519
  # 1. Command string extraction (e.g., "uv http get ...")
520
  if 'command string' in question_lower or 'craft the command' in question_lower:
521
- # Look for command patterns
522
- # Pattern: "uv http get ..." or similar command strings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  command_patterns = [
524
- r'(uv\s+http\s+get\s+[^\n<>"]+(?:\s+-H\s+"[^"]+")?)', # uv http get with optional headers
 
525
  r'(curl\s+[^\n<>"]+)',
526
  r'(wget\s+[^\n<>"]+)',
527
- r'(http\s+get\s+[^\n<>"]+)',
528
  ]
529
  for pattern in command_patterns:
530
  match = re.search(pattern, combined, re.IGNORECASE)
@@ -534,29 +549,15 @@ class QuizSolver:
534
  command = ' '.join(command.split())
535
  # Stop at certain delimiters that indicate end of command
536
  # Remove anything after common sentence endings that aren't part of command
537
- command = re.sub(r'\s+(?:Submit|Do not|Note|Remember|Important).*$', '', command, flags=re.IGNORECASE)
538
  # Substitute <your email> or <email> with actual email if provided
539
  if email:
540
  command = command.replace('<your email>', email)
541
  command = command.replace('<email>', email)
542
- command = command.replace('your email', email)
543
- logger.info(f"Extracted command string: {command[:100]}...")
544
- return command
545
-
546
- # Also look for commands in error responses or instructions
547
- # Pattern: "Submit the command string: ..."
548
- submit_command_pattern = r'[Ss]ubmit\s+the\s+command\s+string[:\s]+(uv\s+http\s+get\s+[^\n<>"]+(?:\s+-H\s+"[^"]+")?)'
549
- match = re.search(submit_command_pattern, combined, re.IGNORECASE)
550
- if match:
551
- command = match.group(1).strip()
552
- command = ' '.join(command.split())
553
- # Substitute <your email> or <email> with actual email if provided
554
- if email:
555
- command = command.replace('<your email>', email)
556
- command = command.replace('<email>', email)
557
- command = command.replace('your email', email)
558
- logger.info(f"Extracted command from instruction: {command[:100]}...")
559
- return command
560
 
561
  # 2. Exact path extraction (e.g., "/project2/data-preparation.md")
562
  if 'exact' in question_lower and ('path' in question_lower or 'string' in question_lower or 'link' in question_lower):
@@ -595,52 +596,60 @@ class QuizSolver:
595
 
596
  # 3. Git commands extraction (e.g., "git add ..." and "git commit ...")
597
  if 'git' in question_lower and ('command' in question_lower or 'stage' in question_lower or 'commit' in question_lower):
598
- # Look for git commands
599
  git_commands = []
600
 
601
- # Pattern for "git add ..." - be more specific
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602
  git_add_patterns = [
603
- r'git\s+add\s+([^\s\n<>"]+)', # "git add env.sample"
604
- r'(git\s+add\s+[^\n<>"]+)', # Full command
605
  ]
606
  for pattern in git_add_patterns:
607
  git_add_match = re.search(pattern, combined, re.IGNORECASE)
608
  if git_add_match:
609
  cmd = git_add_match.group(1).strip()
610
- # If it's just the file, prepend "git add "
611
- if not cmd.startswith('git'):
612
- cmd = f"git add {cmd}"
613
  if cmd not in git_commands:
614
  git_commands.append(cmd)
615
  break
616
 
617
- # Pattern for "git commit ..." - look for commit with message
618
  git_commit_patterns = [
619
- r'git\s+commit\s+-m\s+"([^"]+)"', # "git commit -m "message""
620
- r'git\s+commit\s+-m\s+([^\s\n<>"]+)', # "git commit -m message"
621
- r'(git\s+commit\s+[^\n<>"]+)', # Full command
622
  ]
623
  for pattern in git_commit_patterns:
624
  git_commit_match = re.search(pattern, combined, re.IGNORECASE)
625
  if git_commit_match:
626
  cmd = git_commit_match.group(1).strip()
627
- # If it's just the message, construct full command
628
- if not cmd.startswith('git'):
629
- # Check if message is in quotes
630
- if '"' in cmd or "'" in cmd:
631
- cmd = f'git commit -m {cmd}'
632
- else:
633
- cmd = f'git commit -m "{cmd}"'
634
  if cmd not in git_commands:
635
  git_commands.append(cmd)
636
  break
637
 
638
- # Also check error responses like "Need git add ... then git commit ..."
639
- need_pattern = r'[Nn]eed\s+(git\s+add\s+[^\s]+)\s+then\s+(git\s+commit\s+[^\n<>"]+)'
640
- need_match = re.search(need_pattern, combined, re.IGNORECASE)
641
- if need_match:
642
- git_commands = [need_match.group(1).strip(), need_match.group(2).strip()]
643
-
644
  # If we found git commands, return them
645
  if git_commands:
646
  # If question asks for "two commands", return them separated by newline
@@ -649,7 +658,7 @@ class QuizSolver:
649
  logger.info(f"Extracted git commands: {result}")
650
  return result
651
  # Otherwise return the first one
652
- else:
653
  logger.info(f"Extracted git command: {git_commands[0]}")
654
  return git_commands[0]
655
 
 
518
 
519
  # 1. Command string extraction (e.g., "uv http get ...")
520
  if 'command string' in question_lower or 'craft the command' in question_lower:
521
+ # First, check error responses which often contain the exact command format
522
+ # Pattern: "Submit the command string: uv http get ..."
523
+ submit_command_pattern = r'[Ss]ubmit\s+the\s+command\s+string[:\s]+(uv\s+http\s+get\s+[^\n<>"]+(?:\s+-H\s+"[^"]+")?)'
524
+ match = re.search(submit_command_pattern, combined, re.IGNORECASE)
525
+ if match:
526
+ command = match.group(1).strip()
527
+ command = ' '.join(command.split())
528
+ # Substitute <your email> or <email> with actual email if provided
529
+ if email:
530
+ command = command.replace('<your email>', email)
531
+ command = command.replace('<email>', email)
532
+ logger.info(f"Extracted command from instruction: {command[:100]}...")
533
+ return command
534
+
535
+ # Look for command patterns in the page
536
+ # Pattern: "uv http get https://..." - need to capture full URL and optional header
537
+ # More specific pattern that captures the full command
538
  command_patterns = [
539
+ r'(uv\s+http\s+get\s+https?://[^\s<>"]+(?:\?[^\s<>"]+)?(?:\s+-H\s+"[^"]+")?)', # Full URL with query params and header
540
+ r'(uv\s+http\s+get\s+https?://[^\s<>"]+)', # Just URL
541
  r'(curl\s+[^\n<>"]+)',
542
  r'(wget\s+[^\n<>"]+)',
 
543
  ]
544
  for pattern in command_patterns:
545
  match = re.search(pattern, combined, re.IGNORECASE)
 
549
  command = ' '.join(command.split())
550
  # Stop at certain delimiters that indicate end of command
551
  # Remove anything after common sentence endings that aren't part of command
552
+ command = re.sub(r'\s+(?:Submit|Do not|Note|Remember|Important|\.\s+[A-Z]).*$', '', command, flags=re.IGNORECASE)
553
  # Substitute <your email> or <email> with actual email if provided
554
  if email:
555
  command = command.replace('<your email>', email)
556
  command = command.replace('<email>', email)
557
+ # Ensure we have a complete command (should have URL)
558
+ if 'http' in command.lower() and len(command) > 20: # Reasonable minimum length
559
+ logger.info(f"Extracted command string: {command[:100]}...")
560
+ return command
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
 
562
  # 2. Exact path extraction (e.g., "/project2/data-preparation.md")
563
  if 'exact' in question_lower and ('path' in question_lower or 'string' in question_lower or 'link' in question_lower):
 
596
 
597
  # 3. Git commands extraction (e.g., "git add ..." and "git commit ...")
598
  if 'git' in question_lower and ('command' in question_lower or 'stage' in question_lower or 'commit' in question_lower):
 
599
  git_commands = []
600
 
601
+ # First, check error responses which often contain the exact format
602
+ # Pattern: "Need git add ... then git commit ..."
603
+ need_pattern = r'[Nn]eed\s+(git\s+add\s+[^\s]+)\s+then\s+(git\s+commit\s+[^\n<>"]+)'
604
+ need_match = re.search(need_pattern, combined, re.IGNORECASE)
605
+ if need_match:
606
+ cmd1 = need_match.group(1).strip()
607
+ cmd2 = need_match.group(2).strip()
608
+ # Ensure cmd2 has the message in quotes if needed
609
+ if '-m' in cmd2 and '"' not in cmd2 and "'" not in cmd2:
610
+ # Extract message and add quotes
611
+ msg_match = re.search(r'-m\s+([^\s]+)', cmd2)
612
+ if msg_match:
613
+ msg = msg_match.group(1)
614
+ cmd2 = cmd2.replace(msg, f'"{msg}"')
615
+ git_commands = [cmd1, cmd2]
616
+ result = '\n'.join(git_commands)
617
+ logger.info(f"Extracted git commands from error response: {result}")
618
+ return result
619
+
620
+ # Look for git commands in the page
621
+ # Pattern for "git add env.sample"
622
  git_add_patterns = [
623
+ r'(git\s+add\s+env\.sample)', # Specific file
624
+ r'(git\s+add\s+[^\s\n<>"]+)', # General
625
  ]
626
  for pattern in git_add_patterns:
627
  git_add_match = re.search(pattern, combined, re.IGNORECASE)
628
  if git_add_match:
629
  cmd = git_add_match.group(1).strip()
 
 
 
630
  if cmd not in git_commands:
631
  git_commands.append(cmd)
632
  break
633
 
634
+ # Pattern for "git commit -m "chore: keep env sample""
635
  git_commit_patterns = [
636
+ r'(git\s+commit\s+-m\s+"[^"]+")', # With quotes
637
+ r'(git\s+commit\s+-m\s+[^\s\n<>"]+)', # Without quotes (will add them)
 
638
  ]
639
  for pattern in git_commit_patterns:
640
  git_commit_match = re.search(pattern, combined, re.IGNORECASE)
641
  if git_commit_match:
642
  cmd = git_commit_match.group(1).strip()
643
+ # If message doesn't have quotes, add them
644
+ if '-m' in cmd and '"' not in cmd and "'" not in cmd:
645
+ msg_match = re.search(r'-m\s+([^\s]+)', cmd)
646
+ if msg_match:
647
+ msg = msg_match.group(1)
648
+ cmd = cmd.replace(msg, f'"{msg}"')
 
649
  if cmd not in git_commands:
650
  git_commands.append(cmd)
651
  break
652
 
 
 
 
 
 
 
653
  # If we found git commands, return them
654
  if git_commands:
655
  # If question asks for "two commands", return them separated by newline
 
658
  logger.info(f"Extracted git commands: {result}")
659
  return result
660
  # Otherwise return the first one
661
+ elif git_commands:
662
  logger.info(f"Extracted git command: {git_commands[0]}")
663
  return git_commands[0]
664
 
app/utils.py CHANGED
@@ -74,19 +74,37 @@ def extract_submit_url(text: str, base_url: str) -> Optional[str]:
74
  continue
75
 
76
  # Try to find relative submit links (e.g. href="/submit")
 
77
  rel_patterns = [
78
- r'href=["\\\'](/[^"\\\']*submit[^"\\\']*)["\\\']',
79
- r'(/[^\\s"<>\']*submit[^\\s"<>\']*)',
80
  r'POST\s+to\s+JSON\s+to\s+(/[^\s<>"\'\)]+)', # "POST to JSON to /submit"
81
  r'[Pp]ost\s+(?:to|at):\s+(/[^\s<>"\'\)]+)', # "POST to: /submit"
82
  ]
83
  for pattern in rel_patterns:
84
  matches = re.findall(pattern, text, re.IGNORECASE)
85
  if matches:
86
- candidate = matches[0].strip().rstrip('.,;:!?)}]{["\'')
87
- joined = urljoin(base_url, candidate)
88
- logger.info(f"Found relative submit URL: {joined}")
89
- return joined
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # Try to find submit URL in the base domain with /submit path
92
  if base_url:
 
74
  continue
75
 
76
  # Try to find relative submit links (e.g. href="/submit")
77
+ # Be more strict - only match actual submit endpoints, not paths that happen to contain "submit" in text
78
  rel_patterns = [
79
+ r'href=["\\\'](/[^"\\\']*submit[^"\\\']*)["\\\']', # href="/submit" or href="/api/submit"
 
80
  r'POST\s+to\s+JSON\s+to\s+(/[^\s<>"\'\)]+)', # "POST to JSON to /submit"
81
  r'[Pp]ost\s+(?:to|at):\s+(/[^\s<>"\'\)]+)', # "POST to: /submit"
82
  ]
83
  for pattern in rel_patterns:
84
  matches = re.findall(pattern, text, re.IGNORECASE)
85
  if matches:
86
+ candidate = matches[0].strip()
87
+ # Clean up - stop at first space, parenthesis, or other non-URL character
88
+ # This prevents matching text like "/path (description).Submit"
89
+ candidate = re.sub(r'[\s\(\)].*$', '', candidate) # Remove everything after space or paren
90
+ candidate = candidate.rstrip('.,;:!?)}]{["\'')
91
+ # Validate it's actually a submit endpoint (contains "submit" in the path)
92
+ # AND it doesn't contain file extensions that indicate it's a document path
93
+ if 'submit' in candidate.lower() and not any(ext in candidate.lower() for ext in ['.md', '.txt', '.pdf', '.html']):
94
+ try:
95
+ joined = urljoin(base_url, candidate)
96
+ # Final validation - ensure it's a valid URL
97
+ parsed = urlparse(joined)
98
+ if parsed.scheme and parsed.netloc:
99
+ logger.info(f"Found relative submit URL: {joined}")
100
+ return joined
101
+ except Exception as e:
102
+ logger.warning(f"Invalid relative URL candidate: {candidate}, error: {e}")
103
+ continue
104
+
105
+ # Don't match generic paths that happen to contain "submit" in surrounding text
106
+ # This was causing issues where paths like "/project2/data-preparation.md (local copy provided).Submit that exact"
107
+ # were being matched incorrectly
108
 
109
  # Try to find submit URL in the base domain with /submit path
110
  if base_url: