scicoqa / core /prompt_demo.py
timbmg's picture
inital commit
4caa453 unverified
"""Standalone prompt template loader."""
import logging
import string
logger = logging.getLogger(__name__)
# Embedded discrepancy_generation prompt template
DISCREPANCY_GENERATION_PROMPT = """You are an expert in analyzing scientific papers and their code implementations.
Your task is to carefully identify concrete discrepancies between what is described in a paper and what is actually implemented in the code.
## What counts as a discrepancy
- A concrete paper–code discrepancy means a mismatch between what is stated in the original paper (e.g., formulas, algorithms, logic, methods, processes, or other settings) and what is implemented in the original code repository.
- Each distinct mismatch should be reported as a separate item.
## What does not count as a discrepancy
- Missing information in the paper like hyperparameters (e.g., "the authors did not specify X").
- Hyperparameter mismatches (e.g., learning rate, batch size, dropout rate), since these are typically configurable in code repository.
- Missing implementation in the original code repository (e.g., "the authors did not provide the code for X").
- Bugs or errors in the code that are unrelated to what the paper describes.
## Output format
Provide your findings in the following YAML structure:
```yaml
discrepancies:
- <a summary of the discrepancy between the paper and the code in 3-8 sentences. Your description should contain three parts focusing on the discrepancy: 1) summarize what is described in the paper, 2) summarize what is implemented in the code, and 3) summarize the difference. Do not speculate about the impact.>
- <if there are multiple discrepancies, put each of them in a separate item.>
```
## Paper
${paper}
## Code
${code}
"""
class Prompt:
"""Prompt template handler."""
def __init__(self, template: str = "discrepancy_generation"):
"""
Initialize prompt template.
Args:
template: Template name (currently only "discrepancy_generation" is supported)
"""
self.template = template
if template == "discrepancy_generation":
self.prompt_template = DISCREPANCY_GENERATION_PROMPT
else:
raise ValueError(f"Template '{template}' not found. Available: 'discrepancy_generation'")
# Create Template object for variable substitution
self.prompt = string.Template(self.prompt_template)
# Extract variables from the template
self.prompt_vars = list(self.prompt.get_identifiers())
def __call__(self, **kwargs) -> str:
"""
Substitute variables in the prompt template.
Args:
**kwargs: Variables to substitute (e.g., paper, code)
Returns:
Formatted prompt string
"""
# Remove any '<|endoftext|>' from the kwargs
for k, v in kwargs.items():
if isinstance(v, str) and "<|endoftext|>" in v:
kwargs[k] = v.replace("<|endoftext|>", "endoftext")
return self.prompt.safe_substitute(**kwargs)