moneychatbot

Running

App Files Files Community

moneychatbot / config.py

hadadrjt

SearchGPT: Enhance.

ed54077 3 months ago

raw

history blame

10.5 kB

	#
	# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
	# SPDX-License-Identifier: Apache-2.0
	#

	#OPENAI_API_BASE_URL # Endpoint. Not here -> Hugging Face Spaces secrets
	#OPENAI_API_KEY # API Key. Not here -> Hugging Face Spaces secrets

	MODEL = "gpt-4.1-nano"

	SEARXNG_ENDPOINT = "https://searx.stream/search" # See the endpoint list at https://searx.space
	BAIDU_ENDPOINT = "https://www.baidu.com/s"
	READER_ENDPOINT = "https://r.jina.ai/"
	REQUEST_TIMEOUT = 300 # 5 minute

	INSTRUCTIONS = """
	You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, knowledge validation, and professional summarization capabilities.

	Your absolute rule:
	- You must always execute the provided tools (`web_search`, `read_url`) for every single user query or user request, without exception.
	- You are never allowed to answer directly from your internal knowledge, memory, or training data. Outdated or tool-bypassed answers are strictly forbidden.

	Core Principles:
	- Mandatory Tool Invocation: Every query or request, no matter how simple, factual, or complex, must trigger at least one `web_search`.
	- No Memory Reliance: Do not use prior conversation history, cached context, or built-in knowledge to generate answers. Always re-verify with tools.
	- Up-to-Date Grounding: All responses must be based only on real-time, verifiable data retrieved through tools.
	- Cross-Validation: Always compare findings across at least 3 independent, credible sources before producing a final answer.
	- Professional Output: Responses must be clear, structured, evidence-based, and neutral.

	Execution Workflow:
	1. Initial Web Search
	- Immediately call `web_search` when a query or request arrives.
	- Use multiple query or request variations and search engines (`google`, `bing`, `baidu`) for broader coverage.

	2. Result Selection
	- Choose up to 10 of the most relevant, credible, and content-rich results.
	- Prioritize authoritative sources (academia, institutions, official publications, expert commentary).
	- Avoid low-credibility or single-source reliance.

	3. Content Retrieval
	- For each selected URL, use `read_url`.
	- Extract facts, data, statistics, and relevant arguments.
	- Normalize terminology and remove redundancies.

	4. Cross-Validation
	- Compare extracted data from at least 3 sources.
	- Identify agreements, contradictions, and missing pieces.
	- Validate all numerical, temporal, and factual claims.

	5. Knowledge Integration
	- Synthesize findings into a structured hierarchy:
	- Overview → Key details → Evidence → Citations.
	- Highlight the latest developments and their implications.

	6. Response Construction
	- Always cite sources using [Source Title](URL).
	- Maintain professional, precise, and neutral tone.
	- Use headings, numbered lists, and bullet points for clarity.
	- Ensure readability for both experts and general readers.

	7. Ambiguity & Uncertainty Handling
	- Explicitly mark incomplete, ambiguous, or conflicting data.
	- Provide possible interpretations and reasoned explanations.

	8. Quality & Consistency Assurance
	- Always base answers strictly on tool-derived evidence.
	- Ensure logical flow, factual accuracy, and neutrality.
	- Never bypass tool execution for any query or request.

	Critical Instruction:
	- Every new query or request must trigger a `web_search`.
	- You must not generate answers from prior knowledge, conversation history, or cached data.
	- If tools fail, you must state explicitly that no valid data could be retrieved.
	"""

	CONTENT_EXTRACTION = """
	<system>
	- Analyze the retrieved content in detail
	- Identify all critical facts, arguments, statistics, and relevant data
	- Collect all URLs, hyperlinks, references, and citations mentioned in the content
	- Evaluate credibility of sources, highlight potential biases or conflicts
	- Produce a structured, professional, and comprehensive summary
	- Emphasize clarity, accuracy, and logical flow
	- Include all discovered URLs in the final summary as [Source title](URL)
	- Mark any uncertainties, contradictions, or missing information clearly
	</system>
	"""

	SEARCH_SELECTION = """
	<system>
	- For each search result, fetch the full content using read_url
	- Extract key information, main arguments, data points, and statistics
	- Capture every URL present in the content or references
	- Create a professional structured summary.
	- List each source at the end of the summary in the format [Source title](link)
	- Identify ambiguities or gaps in information
	- Ensure clarity, completeness, and high information density
	</system>
	"""

	DESCRIPTION = """
	<b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities and the ability to read content directly from a URL.
	<br><br>
	This Space implements an agent-based system with <b><a href="https://www.gradio.app" target="_blank">Gradio</a></b>. It is integrated with
	<b><a href="https://docs.searxng.org" target="_blank">SearXNG</a></b>, which is then converted into a script tool or function for native execution.
	<br><br>
	The agent mode is inspired by the <b><a href="https://openwebui.com/t/hadad/deep_research" target="_blank">Deep Research</a></b> from
	<b><a href="https://docs.openwebui.com" target="_blank">OpenWebUI</a></b> tools script.
	<br><br>
	The <b>Deep Research</b> feature is also available on the primary Spaces of <b><a href="https://umint-openwebui.hf.space"
	target="_blank">UltimaX Intelligence</a></b>.
	<br><br>
	Please consider reading the <b><a href="https://huggingface.co/spaces/umint/ai/discussions/37#68b55209c51ca52ed299db4c"
	target="_blank">Terms of Use and Consequences of Violation</a></b> if you wish to proceed to the main Spaces.
	<br><br>
	<b>Like this project? Feel free to buy me a <a href="https://ko-fi.com/hadad" target="_blank">coffee</a></b>.
	"""

	OS = [
	"Windows NT 10.0; Win64; x64",
	"Macintosh; Intel Mac OS X 10_15_7",
	"X11; Linux x86_64",
	"Windows NT 11.0; Win64; x64",
	"Macintosh; Intel Mac OS X 11_6_2"
	]

	OCTETS = [
	1, 2, 3, 4, 5, 8, 12, 13, 14, 15,
	16, 17, 18, 19, 20, 23, 24, 34, 35, 36,
	37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
	47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
	57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
	67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
	77, 78, 79, 80, 81, 82, 83, 84, 85, 86,
	87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
	97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
	107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
	117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
	128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
	138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
	148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
	158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
	168, 170, 171, 172, 173, 174, 175, 176, 177, 178,
	179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
	189, 190, 191, 192, 193, 194, 195, 196, 197, 198,
	199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
	209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
	219, 220, 221, 222, 223
	]

	BROWSERS = [
	"Chrome",
	"Firefox",
	"Safari",
	"Edge",
	"Opera"
	]

	CHROME_VERSIONS = [
	"120.0.0.0",
	"119.0.0.0",
	"118.0.0.0",
	"117.0.0.0",
	"116.0.0.0"
	]

	FIREFOX_VERSIONS = [
	"121.0",
	"120.0",
	"119.0",
	"118.0",
	"117.0"
	]

	SAFARI_VERSIONS = [
	"17.1",
	"17.0",
	"16.6",
	"16.5",
	"16.4",
	]

	EDGE_VERSIONS = [
	"120.0.2210.91",
	"119.0.2151.97",
	"118.0.2088.76",
	"117.0.2045.60",
	"116.0.1938.81"
	]

	DOMAINS = [
	"google.com",
	"bing.com",
	"yahoo.com",
	"duckduckgo.com",
	"baidu.com",
	"yandex.com",
	"facebook.com",
	"twitter.com",
	"linkedin.com",
	"reddit.com",
	"youtube.com",
	"wikipedia.org",
	"amazon.com",
	"github.com",
	"stackoverflow.com",
	"medium.com",
	"quora.com",
	"pinterest.com",
	"instagram.com",
	"tumblr.com"
	]

	PROTOCOLS = [
	"https://",
	"https://www."
	]

	SEARCH_ENGINES = [
	"https://www.google.com/search?q=",
	"https://www.bing.com/search?q=",
	"https://search.yahoo.com/search?p=",
	"https://duckduckgo.com/?q=",
	"https://www.baidu.com/s?wd=",
	"https://yandex.com/search/?text=",
	"https://www.google.co.uk/search?q=",
	"https://www.google.ca/search?q=",
	"https://www.google.com.au/search?q=",
	"https://www.google.de/search?q=",
	"https://www.google.fr/search?q=",
	"https://www.google.co.jp/search?q=",
	"https://www.google.com.br/search?q=",
	"https://www.google.co.in/search?q=",
	"https://www.google.ru/search?q=",
	"https://www.google.it/search?q="
	]

	KEYWORDS = [
	"news",
	"weather",
	"sports",
	"technology",
	"science",
	"health",
	"finance",
	"entertainment",
	"travel",
	"food",
	"education",
	"business",
	"politics",
	"culture",
	"history",
	"music",
	"movies",
	"games",
	"books",
	"art"
	]

	COUNTRIES = [
	"US", "GB", "CA", "AU", "DE", "FR", "JP", "BR", "IN", "RU",
	"IT", "ES", "MX", "NL", "SE", "NO", "DK", "FI", "PL", "TR",
	"KR", "SG", "HK", "TW", "TH", "ID", "MY", "PH", "VN", "AR",
	"CL", "CO", "PE", "VE", "EG", "ZA", "NG", "KE", "MA", "DZ",
	"TN", "IL", "AE", "SA", "QA", "KW", "BH", "OM", "JO", "LB"
	]

	LANGUAGES = [
	"en-US", "en-GB", "en-CA", "en-AU", "de-DE", "fr-FR", "ja-JP",
	"pt-BR", "hi-IN", "ru-RU", "it-IT", "es-ES", "es-MX", "nl-NL",
	"sv-SE", "no-NO", "da-DK", "fi-FI", "pl-PL", "tr-TR", "ko-KR",
	"zh-CN", "zh-TW", "th-TH", "id-ID", "ms-MY", "fil-PH", "vi-VN",
	"es-AR", "es-CL", "es-CO", "es-PE", "es-VE", "ar-EG", "en-ZA",
	"en-NG", "sw-KE", "ar-MA", "ar-DZ", "ar-TN", "he-IL", "ar-AE",
	"ar-SA", "ar-QA", "ar-KW", "ar-BH", "ar-OM", "ar-JO", "ar-LB"
	]

	TIMEZONES = [
	"America/New_York",
	"America/Chicago",
	"America/Los_Angeles",
	"America/Denver",
	"Europe/London",
	"Europe/Paris",
	"Europe/Berlin",
	"Europe/Moscow",
	"Asia/Tokyo",
	"Asia/Shanghai",
	"Asia/Hong_Kong",
	"Asia/Singapore",
	"Asia/Seoul",
	"Asia/Mumbai",
	"Asia/Dubai",
	"Australia/Sydney",
	"Australia/Melbourne",
	"America/Toronto",
	"America/Vancouver",
	"America/Mexico_City",
	"America/Sao_Paulo",
	"America/Buenos_Aires",
	"Africa/Cairo",
	"Africa/Johannesburg",
	"Africa/Lagos",
	"Africa/Nairobi",
	"Pacific/Auckland",
	"Pacific/Honolulu"
	]