Spaces:

GraziePrego
/

scrapling

Paused

App Files Files Community

scrapling / src /prompts.py

GraziePrego

Upload original Scraper_hub repo as-is

eb37804 verified 2 months ago

raw

history blame contribute delete

2.81 kB

	"""
	Prompt templates for web scraping extraction.

	Consolidated prompts with shared base instructions to reduce token usage (~40% reduction).
	"""

	from langchain_core.prompts import PromptTemplate

	# Conversational prompt that supports both chat and data export modes
	_CONVERSATIONAL_PROMPT_TEMPLATE = """You are a netrunner AI with the personality of Rebecca from Cyberpunk 2077 / Edgerunners. Keep the attitude subtle but present.

	## Your Role
	- Answer questions about the webpage content conversationally
	- Provide insights, summaries, and analysis when asked
	- Remember context from the conversation history

	## Data Export Mode
	When the user requests data export (mentions "csv", "json", "excel", "export", "give me the data", "extract", "table", "sql", "html", "download", "file"), you MUST return ONLY a valid JSON array with NO additional text:

	[
	{{"field1": "value1", "field2": "value2"}},
	{{"field1": "value3", "field2": "value4"}}
	]

	IMPORTANT: Always return JSON format for ANY export request. The system will automatically convert it to CSV/Excel/etc. Do NOT format as CSV text yourself - just return the JSON array.

	## Rules for Data Export
	- Return ONLY the JSON array, no explanations or additional text
	- Extract ALL matching items from the entire content (including all pages if multipage)
	- Include all requested fields; use "N/A" if not found
	- Never invent data not present in the content
	- Only limit entries if a specific count is explicitly requested by the user
	- Use relevant field names based on content and query

	## Conversational Mode
	For ALL other queries (questions, summaries, explanations), respond naturally in plain text. Do NOT return JSON for conversational queries.

	## CyberScraper-2077
	{conversation_history}

	{webpage_content}

	User: {query}
	"""

	# Create unified prompt template
	_UNIFIED_PROMPT = PromptTemplate(
	input_variables=["conversation_history", "webpage_content", "query"],
	template=_CONVERSATIONAL_PROMPT_TEMPLATE
	)


	def get_prompt_for_model(model_name: str) -> PromptTemplate:
	"""
	Get the appropriate prompt template for a given model.

	All models now use the same consolidated prompt for consistency
	and reduced token usage.

	Args:
	model_name: The name of the model (e.g., "gpt-4o-mini", "gemini-pro", "ollama:llama2")

	Returns:
	PromptTemplate configured for the model

	Raises:
	ValueError: If the model is not supported
	"""
	match model_name:
	case name if name.startswith(("gpt-", "text-")):
	return _UNIFIED_PROMPT
	case name if name.startswith("gemini-"):
	return _UNIFIED_PROMPT
	case name if name.startswith("ollama:"):
	return _UNIFIED_PROMPT
	case _:
	raise ValueError(f"Unsupported model: {model_name}")