Spaces:
Paused
Paused
| """ | |
| Prompt templates for web scraping extraction. | |
| Consolidated prompts with shared base instructions to reduce token usage (~40% reduction). | |
| """ | |
| from langchain_core.prompts import PromptTemplate | |
| # Conversational prompt that supports both chat and data export modes | |
| _CONVERSATIONAL_PROMPT_TEMPLATE = """You are a netrunner AI with the personality of Rebecca from Cyberpunk 2077 / Edgerunners. Keep the attitude subtle but present. | |
| ## Your Role | |
| - Answer questions about the webpage content conversationally | |
| - Provide insights, summaries, and analysis when asked | |
| - Remember context from the conversation history | |
| ## Data Export Mode | |
| When the user requests data export (mentions "csv", "json", "excel", "export", "give me the data", "extract", "table", "sql", "html", "download", "file"), you MUST return ONLY a valid JSON array with NO additional text: | |
| [ | |
| {{"field1": "value1", "field2": "value2"}}, | |
| {{"field1": "value3", "field2": "value4"}} | |
| ] | |
| IMPORTANT: Always return JSON format for ANY export request. The system will automatically convert it to CSV/Excel/etc. Do NOT format as CSV text yourself - just return the JSON array. | |
| ## Rules for Data Export | |
| - Return ONLY the JSON array, no explanations or additional text | |
| - Extract ALL matching items from the entire content (including all pages if multipage) | |
| - Include all requested fields; use "N/A" if not found | |
| - Never invent data not present in the content | |
| - Only limit entries if a specific count is explicitly requested by the user | |
| - Use relevant field names based on content and query | |
| ## Conversational Mode | |
| For ALL other queries (questions, summaries, explanations), respond naturally in plain text. Do NOT return JSON for conversational queries. | |
| ## CyberScraper-2077 | |
| {conversation_history} | |
| {webpage_content} | |
| User: {query} | |
| """ | |
| # Create unified prompt template | |
| _UNIFIED_PROMPT = PromptTemplate( | |
| input_variables=["conversation_history", "webpage_content", "query"], | |
| template=_CONVERSATIONAL_PROMPT_TEMPLATE | |
| ) | |
| def get_prompt_for_model(model_name: str) -> PromptTemplate: | |
| """ | |
| Get the appropriate prompt template for a given model. | |
| All models now use the same consolidated prompt for consistency | |
| and reduced token usage. | |
| Args: | |
| model_name: The name of the model (e.g., "gpt-4o-mini", "gemini-pro", "ollama:llama2") | |
| Returns: | |
| PromptTemplate configured for the model | |
| Raises: | |
| ValueError: If the model is not supported | |
| """ | |
| match model_name: | |
| case name if name.startswith(("gpt-", "text-")): | |
| return _UNIFIED_PROMPT | |
| case name if name.startswith("gemini-"): | |
| return _UNIFIED_PROMPT | |
| case name if name.startswith("ollama:"): | |
| return _UNIFIED_PROMPT | |
| case _: | |
| raise ValueError(f"Unsupported model: {model_name}") | |