moneychatbot

Running

File size: 18,266 Bytes

#
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
# SPDX-License-Identifier: Apache-2.0
#

#OPENAI_API_BASE_URL  # Endpoint. Not here -> Hugging Face Spaces secrets

#OPENAI_API_KEY  # API Key.  Not here -> Hugging Face Spaces secrets

SEARXNG_ENDPOINT = "https://searx.stream/search"  # See the endpoint list at https://searx.space

READER_ENDPOINT = "https://r.jina.ai/"

REQUEST_TIMEOUT = 300  # 5 minute

MODEL = "gpt-4.1-nano"

TOOLS_TEMPERATURE = 0.6

CHAT_TEMPERATURE  = 0.95

STREAM = True

INSTRUCTIONS = (
    "You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, "
    "knowledge validation, and professional summarization capabilities.\n\n"
    
    "Your absolute rules:\n"
    "- You must always execute and call the provided tools (`web_search`, `read_url`) for every single "
    "user query or user request, without exception.\n"
    "- You are never allowed to answer directly from your internal knowledge, memory, or training data. "
    "Outdated or tool-bypassed answers are strictly forbidden.\n\n"
    
    "Core Principles:\n"
    "- Mandatory Tool Invocation: Every query or request, no matter how simple, factual, or complex, "
    "must trigger at least one `web_search` or `read_url`.\n"
    "- No Memory Reliance: Do not use prior conversation history, cached context, or built-in knowledge "
    "to generate answers. Always re-verify with tools.\n"
    "- Up-to-Date Grounding: All responses must be based only on real-time, verifiable data retrieved "
    "through tools.\n"
    "- Cross-Validation: Always compare findings across at least 3 independent, credible sources before "
    "producing a final answer.\n"
    "- Professional Output: Responses must be clear, structured, evidence-based, and neutral.\n\n"
    
    "Execution Workflow:\n"
    "1. Initial Web Search\n"
    "   - Immediately execute and call `web_search` or `read_url` when a query or request arrives.\n"
    "   - For `web_search` use multiple query or request variations for broader coverage.\n\n"
    
    "2. Result Selection\n"
    "   - For each search result, fetch the full content using `read_url`.\n"
    "   - Extract key information, main arguments, data points, and statistics.\n"
    "   - Capture every URL present in the content or references.\n"
    "   - Create a professional structured summary.\n"
    "   - List each source at the end of the summary in the format "
    "`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
    "   - Identify ambiguities or gaps in information.\n"
    "   - Ensure clarity, completeness, and high information density.\n"
    "   - Select up to 10 of the most relevant, credible, and content-rich results.\n"
    "   - Prioritize authoritative sources: academic publications, institutional reports, "
    "official documents, expert commentary.\n"
    "   - Deprioritize low-credibility, promotional, or unverified sources.\n"
    "   - Avoid over-reliance on any single source.\n\n"
    
    "3. Content Retrieval\n"
    "   - For each selected URL, use `read_url`.\n"
    "   - Analyze the retrieved content in detail.\n"
    "   - Identify all critical facts, arguments, statistics, and relevant data.\n"
    "   - Collect all URLs, hyperlinks, references, and citations mentioned in the content.\n"
    "   - Evaluate credibility of sources, highlight potential biases or conflicts.\n"
    "   - Produce a structured, professional, and comprehensive summary.\n"
    "   - Emphasize clarity, accuracy, and logical flow.\n"
    "   - Include all discovered URLs in the final summary as "
    "`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
    "   - Mark any uncertainties, contradictions, or missing information clearly.\n"
    "   - Extract key elements: facts, statistics, data points, expert opinions, and relevant arguments.\n"
    "   - Normalize terminology, refine phrasing, and remove redundancies for clarity and consistency.\n\n"
    
    "4. Cross-Validation\n"
    "   - Compare extracted information across at least 3 distinct sources.\n"
    "   - Identify convergences (agreement), divergences (contradictions), and gaps (missing data).\n"
    "   - Validate all numerical values, temporal references, and factual claims through "
    "multiple corroborations.\n\n"
    
    "5. Knowledge Integration\n"
    "   - Synthesize findings into a structured hierarchy: "
    "Overview → Key details → Supporting evidence → Citations.\n"
    "   - Emphasize the latest developments, trends, and their implications.\n"
    "   - Balance depth (for experts) with clarity (for general readers).\n\n"
    
    "6. Response Construction\n"
    "   - Always cite sources inline using "
    "`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
    "   - Maintain a professional, precise, and neutral tone.\n"
    "   - Use clear formatting: headings, numbered lists, and bullet points.\n"
    "   - Ensure readability, logical progression, and accessibility.\n\n"
    
    "7. Ambiguity & Uncertainty Handling\n"
    "   - Explicitly flag incomplete, ambiguous, or conflicting data.\n"
    "   - Provide possible interpretations with transparent reasoning.\n"
    "   - Clearly note limitations where evidence is insufficient or weak.\n\n"
    
    "8. Quality & Consistency Assurance\n"
    "   - Always base answers strictly on tool-derived evidence.\n"
    "   - Guarantee logical flow, factual accuracy, and consistency in terminology.\n"
    "   - Maintain neutrality and avoid speculative claims.\n"
    "   - Never bypass tool execution for any query or request.\n\n"
    
    "Critical Instruction:\n"
    "- Every new query or request must trigger a `web_search` or `read_url`.\n"
    "- You must not generate answers from prior knowledge, conversation history, or cached data.\n"
    "- Always use Markdown format for URL sources with "
    "`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
    "- Extract the most relevant and insightful information that directly addresses the query. "
    "Focus on accuracy, depth of coverage, and conceptual clarity.\n"
    "- Organize findings into a well-structured format with clear headings and subheadings. "
    "Use bullet points where needed, but ensure the overall output reads like a professional "
    "research summary rather than a simple list.\n"
    "- Critically evaluate each source for credibility, reliability, and potential bias. "
    "Identify which sources are authoritative, widely cited, or most relevant to the research context.\n"
    "- Compare and contrast perspectives across sources. Highlight areas of consensus, disagreement, "
    "or uncertainty. Note any gaps in the existing information and suggest directions for further exploration.\n"
    "- Provide direct references for every cited point using Markdown links in the format "
    "`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`. Do not display raw URLs. "
    "Ensure all data, claims, or quotations can be traced back to their sources.\n"
    "- Explicitly acknowledge limitations in the available information, such as outdated data, "
    "lack of peer-reviewed evidence, or missing context. Offer reasoned strategies for overcoming "
    "these gaps where possible.\n"
    "- Write with a professional, analytical, and objective tone. Avoid speculation unless clearly "
    "flagged as such. Support reasoning with evidence wherever possible.\n"
    "- If tools fail, you must state explicitly that no valid data could be retrieved."
)

REMINDERS = (
    "<system>\n"
    "- Analyze the retrieved content in detail.\n"
    "- Identify all critical facts, arguments, statistics, and relevant data.\n"
    "- Collect all URLs, hyperlinks, references, and citations mentioned in the content.\n"
    "- Evaluate credibility of sources, highlight potential biases or conflicts.\n"
    "- Produce a structured, professional, and comprehensive summary.\n"
    "- Emphasize clarity, accuracy, and logical flow.\n"
    "- Include all discovered URLs in the final summary as "
    "[source_title_or_article_or_tags_or_domain](source_url_or_source_link).\n"
    "- Mark any uncertainties, contradictions, or missing information clearly.\n"
    "- Extract key information, main arguments, data points, and statistics.\n"
    "- Capture every URL present in the content or references.\n"
    "- Create a professional structured summary.\n"
    "- List each source at the end of the summary in the format "
    "[source_title_or_article_or_tags_or_domain](source_url_or_source_link).\n"
    "- Identify ambiguities or gaps in information.\n"
    "- Extract the most relevant and insightful information that directly addresses the query. "
    "Focus on accuracy, depth of coverage, and conceptual clarity.\n"
    "- Organize findings into a well-structured format with clear headings and subheadings. "
    "Use bullet points where needed, but ensure the overall output reads like a professional "
    "research summary rather than a simple list.\n"
    "- Critically evaluate each source for credibility, reliability, and potential bias. "
    "Identify which sources are authoritative, widely cited, or most relevant to the research context.\n"
    "- Compare and contrast perspectives across sources. Highlight areas of consensus, disagreement, "
    "or uncertainty. Note any gaps in the existing information and suggest directions for further exploration.\n"
    "- Provide direct references for every cited point using markdown links in the format "
    "[source_title_or_article_or_tags_or_domain](source_url_or_source_link). "
    "Do not display raw URLs. Ensure all data, claims, or quotations can be traced back to their sources.\n"
    "- Explicitly acknowledge limitations in the available information, such as outdated data, "
    "lack of peer-reviewed evidence, or missing context. Offer reasoned strategies for overcoming "
    "these gaps where possible.\n"
    "- Write with a professional, analytical, and objective tone. Avoid speculation unless clearly "
    "flagged as such. Support reasoning with evidence wherever possible.\n"
    "- Ensure clarity, completeness, and high information density.\n"
    "</system>"
)  # Small model need explicit instructions to understand context

MAXIMUM_ITERATIONS = 1  # Max tool execution

MAX_RETRY_LIMIT = 3  # Max retries if tools fail or server doesn’t respond

ITERATION_METRICS = {
    "attempts": 0,
    "failures": 0,
    "success_rate": 0,
    "error_patterns": {},
    "retry_delays": [
        0.02,
        0.03,
        0.04,
        0.05,
        0.06,
        0.07
    ],
    "backoff_multiplier": 0.2
}

REASONING_STEPS = {
    "web_search": {
        "parsing": (
            "I need to search for information about: {query}"
        ),
        "executing": (
            "I'm now executing the web search for: {query}"
            "<br>"
            "<loading_animation>"
        ),
        "completed": (
            "I have successfully completed the web search for: {query}<br><br>"
            "Preview of results:<br>{preview}"
        ),
        "error": (
            "I encountered an issue while attempting to search for: {query}<br><br>"
            "Error details: {error}"
        )
    },
    "read_url": {
        "parsing": (
            "I need to read and extract content from the URL: {url}"
        ),
        "executing": (
            "I'm now accessing the URL: {url}"
            "<br>"
            "<loading_animation>"
        ),
        "completed": (
            "I have successfully extracted content from: {url}<br><br>"
            "Preview of extracted content:<br>{preview}"
        ),
        "error": (
            "I encountered an issue while trying to access: {url}<br><br>"
            "Error details: {error}"
        )
    }
}

TCP_CONNECTOR_ENABLE_DNS_CACHE = True  # AIOHTTP

TCP_CONNECTOR_TTL_DNS_CACHE = 300  # AIOHTTP

TCP_CONNECTOR_LIMIT = 100  # AIOHTTP

TCP_CONNECTOR_LIMIT_PER_HOST = 30  # AIOHTTP

TCP_CONNECTOR_FORCE_CLOSE = False  # AIOHTTP

TCP_CONNECTOR_ENABLE_CLEANUP = True  # AIOHTTP

ENABLE_TRUST_ENV = True  # AIOHTTP

ENABLE_CONNECTOR_OWNER = True  # AIOHTTP

OS = [
    "Windows NT 10.0; Win64; x64",
    "Macintosh; Intel Mac OS X 10_15_7",
    "X11; Linux x86_64",
    "Windows NT 11.0; Win64; x64",
    "Macintosh; Intel Mac OS X 11_6_2"
]

OCTETS = [
     1,   2,   3,   4,   5,   8,  12,  13,  14,  15,
    16,  17,  18,  19,  20,  23,  24,  34,  35,  36,
    37,  38,  39,  40,  41,  42,  43,  44,  45,  46,
    47,  48,  49,  50,  51,  52,  53,  54,  55,  56,
    57,  58,  59,  60,  61,  62,  63,  64,  65,  66,
    67,  68,  69,  70,  71,  72,  73,  74,  75,  76,
    77,  78,  79,  80,  81,  82,  83,  84,  85,  86,
    87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
    97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
   107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
   117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
   128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
   138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
   148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
   158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
   168, 170, 171, 172, 173, 174, 175, 176, 177, 178,
   179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
   189, 190, 191, 192, 193, 194, 195, 196, 197, 198,
   199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
   209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
   219, 220, 221, 222, 223
]

BROWSERS = [
    "Chrome",
    "Firefox",
    "Safari",
    "Edge",
    "Opera"
]

CHROME_VERSIONS = [
    "120.0.0.0",
    "119.0.0.0",
    "118.0.0.0",
    "117.0.0.0",
    "116.0.0.0"
]

FIREFOX_VERSIONS = [
    "121.0",
    "120.0",
    "119.0",
    "118.0",
    "117.0"
]

SAFARI_VERSIONS = [
    "17.1",
    "17.0",
    "16.6",
    "16.5",
    "16.4",
]

EDGE_VERSIONS = [
    "120.0.2210.91",
    "119.0.2151.97",
    "118.0.2088.76",
    "117.0.2045.60",
    "116.0.1938.81"
]

DOMAINS = [
    "google.com",
    "bing.com",
    "yahoo.com",
    "duckduckgo.com",
    "baidu.com",
    "yandex.com",
    "facebook.com",
    "twitter.com",
    "linkedin.com",
    "reddit.com",
    "youtube.com",
    "wikipedia.org",
    "amazon.com",
    "github.com",
    "stackoverflow.com",
    "medium.com",
    "quora.com",
    "pinterest.com",
    "instagram.com",
    "tumblr.com"
]

PROTOCOLS = [
    "https://",
    "https://www."
]

SEARCH_ENGINES = [
    "https://www.google.com/search?q=",
    "https://www.bing.com/search?q=",
    "https://search.yahoo.com/search?p=",
    "https://duckduckgo.com/?q=",
    "https://www.baidu.com/s?wd=",
    "https://yandex.com/search/?text=",
    "https://www.google.co.uk/search?q=",
    "https://www.google.ca/search?q=",
    "https://www.google.com.au/search?q=",
    "https://www.google.de/search?q=",
    "https://www.google.fr/search?q=",
    "https://www.google.co.jp/search?q=",
    "https://www.google.com.br/search?q=",
    "https://www.google.co.in/search?q=",
    "https://www.google.ru/search?q=",
    "https://www.google.it/search?q="
]

KEYWORDS = [
    "news",
    "weather",
    "sports",
    "technology",
    "science",
    "health",
    "finance",
    "entertainment",
    "travel",
    "food",
    "education",
    "business",
    "politics",
    "culture",
    "history",
    "music",
    "movies",
    "games",
    "books",
    "art"
]

COUNTRIES = [
    "US", "GB", "CA", "AU", "DE", "FR", "JP", "BR", "IN", "RU",
    "IT", "ES", "MX", "NL", "SE", "NO", "DK", "FI", "PL", "TR",
    "KR", "SG", "HK", "TW", "TH", "ID", "MY", "PH", "VN", "AR",
    "CL", "CO", "PE", "VE", "EG", "ZA", "NG", "KE", "MA", "DZ",
    "TN", "IL", "AE", "SA", "QA", "KW", "BH", "OM", "JO", "LB"
]

LANGUAGES = [
    "en-US", "en-GB", "en-CA", "en-AU", "de-DE", "fr-FR", "ja-JP",
    "pt-BR", "hi-IN", "ru-RU", "it-IT", "es-ES", "es-MX", "nl-NL",
    "sv-SE", "no-NO", "da-DK", "fi-FI", "pl-PL", "tr-TR", "ko-KR",
    "zh-CN", "zh-TW", "th-TH", "id-ID", "ms-MY", "fil-PH", "vi-VN",
    "es-AR", "es-CL", "es-CO", "es-PE", "es-VE", "ar-EG", "en-ZA",
    "en-NG", "sw-KE", "ar-MA", "ar-DZ", "ar-TN", "he-IL", "ar-AE",
    "ar-SA", "ar-QA", "ar-KW", "ar-BH", "ar-OM", "ar-JO", "ar-LB"
]

TIMEZONES = [
    "America/New_York",
    "America/Chicago",
    "America/Los_Angeles",
    "America/Denver",
    "Europe/London",
    "Europe/Paris",
    "Europe/Berlin",
    "Europe/Moscow",
    "Asia/Tokyo",
    "Asia/Shanghai",
    "Asia/Hong_Kong",
    "Asia/Singapore",
    "Asia/Seoul",
    "Asia/Mumbai",
    "Asia/Dubai",
    "Australia/Sydney",
    "Australia/Melbourne",
    "America/Toronto",
    "America/Vancouver",
    "America/Mexico_City",
    "America/Sao_Paulo",
    "America/Buenos_Aires",
    "Africa/Cairo",
    "Africa/Johannesburg",
    "Africa/Lagos",
    "Africa/Nairobi",
    "Pacific/Auckland",
    "Pacific/Honolulu"
]

DESCRIPTION = (
    "<b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities "
    "and the ability to read content directly from a URL.<br><br>"
    "This Space implements an agent-based system with "
    "<b><a href='https://www.gradio.app' target='_blank'>Gradio</a></b>. "
    "It is integrated with "
    "<b><a href='https://docs.searxng.org' target='_blank'>SearXNG</a></b>, "
    "which is then converted into a script tool or function for native execution.<br><br>"
    "The agent mode is inspired by the "
    "<b><a href='https://openwebui.com/t/hadad/deep_research' target='_blank'>Deep Research</a></b> "
    "from <b><a href='https://docs.openwebui.com' target='_blank'>OpenWebUI</a></b> tools script.<br><br>"
    "The <b>Deep Research</b> feature is also available on the primary Spaces of "
    "<b><a href='https://umint-openwebui.hf.space' target='_blank'>UltimaX Intelligence</a></b>.<br><br>"
    "Please consider reading the "
    "<b><a href='https://huggingface.co/spaces/umint/ai/discussions/37#68b55209c51ca52ed299db4c' "
    "target='_blank'>Terms of Use and Consequences of Violation</a></b> "
    "if you wish to proceed to the main Spaces.<br><br>"
    "<b>Like this project? Feel free to buy me a "
    "<a href='https://ko-fi.com/hadad' target='_blank'>coffee</a></b>."
)  # Gradio