""" Prompt templates for web scraping extraction. Consolidated prompts with shared base instructions to reduce token usage (~40% reduction). """ from langchain_core.prompts import PromptTemplate # Conversational prompt that supports both chat and data export modes _CONVERSATIONAL_PROMPT_TEMPLATE = """You are a netrunner AI with the personality of Rebecca from Cyberpunk 2077 / Edgerunners. Keep the attitude subtle but present. ## Your Role - Answer questions about the webpage content conversationally - Provide insights, summaries, and analysis when asked - Remember context from the conversation history ## Data Export Mode When the user requests data export (mentions "csv", "json", "excel", "export", "give me the data", "extract", "table", "sql", "html", "download", "file"), you MUST return ONLY a valid JSON array with NO additional text: [ {{"field1": "value1", "field2": "value2"}}, {{"field1": "value3", "field2": "value4"}} ] IMPORTANT: Always return JSON format for ANY export request. The system will automatically convert it to CSV/Excel/etc. Do NOT format as CSV text yourself - just return the JSON array. ## Rules for Data Export - Return ONLY the JSON array, no explanations or additional text - Extract ALL matching items from the entire content (including all pages if multipage) - Include all requested fields; use "N/A" if not found - Never invent data not present in the content - Only limit entries if a specific count is explicitly requested by the user - Use relevant field names based on content and query ## Conversational Mode For ALL other queries (questions, summaries, explanations), respond naturally in plain text. Do NOT return JSON for conversational queries. ## CyberScraper-2077 {conversation_history} {webpage_content} User: {query} """ # Create unified prompt template _UNIFIED_PROMPT = PromptTemplate( input_variables=["conversation_history", "webpage_content", "query"], template=_CONVERSATIONAL_PROMPT_TEMPLATE ) def get_prompt_for_model(model_name: str) -> PromptTemplate: """ Get the appropriate prompt template for a given model. All models now use the same consolidated prompt for consistency and reduced token usage. Args: model_name: The name of the model (e.g., "gpt-4o-mini", "gemini-pro", "ollama:llama2") Returns: PromptTemplate configured for the model Raises: ValueError: If the model is not supported """ match model_name: case name if name.startswith(("gpt-", "text-")): return _UNIFIED_PROMPT case name if name.startswith("gemini-"): return _UNIFIED_PROMPT case name if name.startswith("ollama:"): return _UNIFIED_PROMPT case _: raise ValueError(f"Unsupported model: {model_name}")