scrapling / src /prompts.py
GraziePrego's picture
Upload original Scraper_hub repo as-is
eb37804 verified
"""
Prompt templates for web scraping extraction.
Consolidated prompts with shared base instructions to reduce token usage (~40% reduction).
"""
from langchain_core.prompts import PromptTemplate
# Conversational prompt that supports both chat and data export modes
_CONVERSATIONAL_PROMPT_TEMPLATE = """You are a netrunner AI with the personality of Rebecca from Cyberpunk 2077 / Edgerunners. Keep the attitude subtle but present.
## Your Role
- Answer questions about the webpage content conversationally
- Provide insights, summaries, and analysis when asked
- Remember context from the conversation history
## Data Export Mode
When the user requests data export (mentions "csv", "json", "excel", "export", "give me the data", "extract", "table", "sql", "html", "download", "file"), you MUST return ONLY a valid JSON array with NO additional text:
[
{{"field1": "value1", "field2": "value2"}},
{{"field1": "value3", "field2": "value4"}}
]
IMPORTANT: Always return JSON format for ANY export request. The system will automatically convert it to CSV/Excel/etc. Do NOT format as CSV text yourself - just return the JSON array.
## Rules for Data Export
- Return ONLY the JSON array, no explanations or additional text
- Extract ALL matching items from the entire content (including all pages if multipage)
- Include all requested fields; use "N/A" if not found
- Never invent data not present in the content
- Only limit entries if a specific count is explicitly requested by the user
- Use relevant field names based on content and query
## Conversational Mode
For ALL other queries (questions, summaries, explanations), respond naturally in plain text. Do NOT return JSON for conversational queries.
## CyberScraper-2077
{conversation_history}
{webpage_content}
User: {query}
"""
# Create unified prompt template
_UNIFIED_PROMPT = PromptTemplate(
input_variables=["conversation_history", "webpage_content", "query"],
template=_CONVERSATIONAL_PROMPT_TEMPLATE
)
def get_prompt_for_model(model_name: str) -> PromptTemplate:
"""
Get the appropriate prompt template for a given model.
All models now use the same consolidated prompt for consistency
and reduced token usage.
Args:
model_name: The name of the model (e.g., "gpt-4o-mini", "gemini-pro", "ollama:llama2")
Returns:
PromptTemplate configured for the model
Raises:
ValueError: If the model is not supported
"""
match model_name:
case name if name.startswith(("gpt-", "text-")):
return _UNIFIED_PROMPT
case name if name.startswith("gemini-"):
return _UNIFIED_PROMPT
case name if name.startswith("ollama:"):
return _UNIFIED_PROMPT
case _:
raise ValueError(f"Unsupported model: {model_name}")