search-web-MCP-server

Runtime error

Olaemad commited on Jun 10, 2025

Commit

237ef97

1 Parent(s): bcfcfc4

Add initial project structure with configuration and factory classes

- Created pyproject.toml and requirements.txt for project dependencies.
- Implemented AnalyzerFactory, ScraperFactory, and SearcherFactory classes.
- Defined interfaces for analyzers, scrapers, and searchers.
- Added type enumerations for searchers, scrapers, and analyzers.

Files changed (11) hide show

pyproject.toml +69 -0
requirements.txt +61 -0
src/core/factory/__init__.py +0 -0
src/core/factory/analyzer_facrory.py +18 -0
src/core/factory/scraper_factory.py +13 -0
src/core/factory/searcher_factory.py +13 -0
src/core/interface/__init__.py +0 -0
src/core/interface/analyzer_interface.py +23 -0
src/core/interface/scraper_interface.py +18 -0
src/core/interface/searcher_interface.py +41 -0
src/core/types.py +41 -0

pyproject.toml ADDED Viewed

	@@ -0,0 +1,69 @@

+[project]
+name = "search-tool"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "annotated-types==0.7.0",
+    "anyio==4.9.0",
+    "babel==2.17.0",
+    "beautifulsoup4==4.13.4",
+    "cachetools==5.5.2",
+    "certifi==2025.4.26",
+    "charset-normalizer==3.4.2",
+    "click==8.2.1",
+    "courlan==1.3.2",
+    "dateparser==1.2.1",
+    "distro==1.9.0",
+    "exceptiongroup==1.3.0",
+    "google-api-core==2.25.0",
+    "google-api-python-client==2.171.0",
+    "google-auth==2.40.3",
+    "google-auth-httplib2==0.2.0",
+    "googleapis-common-protos==1.70.0",
+    "googlesearch-python==1.3.0",
+    "h11==0.16.0",
+    "htmldate==1.9.3",
+    "httpcore==1.0.9",
+    "httplib2==0.22.0",
+    "httpx==0.28.1",
+    "httpx-sse==0.4.0",
+    "idna==3.10",
+    "jiter==0.10.0",
+    "justext==3.0.2",
+    "lxml==5.4.0",
+    "lxml-html-clean==0.4.2",
+    "mcp==1.9.3",
+    "openai==1.85.0",
+    "proto-plus==1.26.1",
+    "protobuf==6.31.1",
+    "pyasn1==0.6.1",
+    "pyasn1-modules==0.4.2",
+    "pydantic==2.11.5",
+    "pydantic-core==2.33.2",
+    "pydantic-settings==2.9.1",
+    "pyparsing==3.2.3",
+    "python-dateutil==2.9.0.post0",
+    "python-dotenv==1.1.0",
+    "python-multipart==0.0.20",
+    "pytz==2025.2",
+    "regex==2024.11.6",
+    "requests==2.32.4",
+    "rsa==4.9.1",
+    "six==1.17.0",
+    "sniffio==1.3.1",
+    "soupsieve==2.7",
+    "sse-starlette==2.3.6",
+    "starlette==0.47.0",
+    "tld==0.13.1",
+    "tqdm==4.67.1",
+    "trafilatura==2.0.0",
+    "typing-extensions==4.14.0",
+    "typing-inspection==0.4.1",
+    "tzlocal==5.3.1",
+    "uritemplate==4.2.0",
+    "urllib3==2.4.0",
+    "uvicorn==0.34.3",
+    "validators==0.35.0",
+    ]

requirements.txt ADDED Viewed

	@@ -0,0 +1,61 @@

+annotated-types==0.7.0
+anyio==4.9.0
+babel==2.17.0
+beautifulsoup4==4.13.4
+cachetools==5.5.2
+certifi==2025.4.26
+charset-normalizer==3.4.2
+click==8.2.1
+courlan==1.3.2
+dateparser==1.2.1
+distro==1.9.0
+exceptiongroup==1.3.0
+google-api-core==2.25.0
+google-api-python-client==2.171.0
+google-auth==2.40.3
+google-auth-httplib2==0.2.0
+googleapis-common-protos==1.70.0
+googlesearch-python==1.3.0
+h11==0.16.0
+htmldate==1.9.3
+httpcore==1.0.9
+httplib2==0.22.0
+httpx==0.28.1
+httpx-sse==0.4.0
+idna==3.10
+jiter==0.10.0
+justext==3.0.2
+lxml==5.4.0
+lxml-html-clean==0.4.2
+mcp==1.9.3
+openai==1.85.0
+proto-plus==1.26.1
+protobuf==6.31.1
+pyasn1==0.6.1
+pyasn1-modules==0.4.2
+pydantic==2.11.5
+pydantic-core==2.33.2
+pydantic-settings==2.9.1
+pyparsing==3.2.3
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-multipart==0.0.20
+pytz==2025.2
+regex==2024.11.6
+requests==2.32.4
+rsa==4.9.1
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.7
+sse-starlette==2.3.6
+starlette==0.47.0
+tld==0.13.1
+tqdm==4.67.1
+trafilatura==2.0.0
+typing-extensions==4.14.0
+typing-inspection==0.4.1
+tzlocal==5.3.1
+uritemplate==4.2.0
+urllib3==2.4.0
+uvicorn==0.34.3
+validators==0.35.0

src/core/factory/__init__.py ADDED Viewed

File without changes

src/core/factory/analyzer_facrory.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import os
+from dotenv import load_dotenv
+from src.core.types import AnalyzerType
+from src.analyzer.openai_analyzer import OpenaiAnalyzer
+load_dotenv()  # Loads from .env file
+class AnalyzerFactory:
+    @staticmethod
+    def initialize_analyzer(analyzer_type: str):
+        if analyzer_type == AnalyzerType.OPENAI_ANALYZER:
+            return OpenaiAnalyzer(api_key=os.getenv("OPENAI_API_KEY"))
+        else:
+            raise Exception(
+                f"Unsupported analyzer type please choose from {[analyzer_type for analyzer_type in AnalyzerType.__annotations__]}"
+            )

src/core/factory/scraper_factory.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from src.core.types import ScraperType
+from src.scraper.trafilatura_scraper import TrafilaturaScraper
+class ScraperFactory:
+    @staticmethod
+    def initialize_scraper(scraper_type: str):
+        if scraper_type == ScraperType.TRAFILATURA_SCRAPER:
+            return TrafilaturaScraper()
+        else:
+            raise Exception(
+                f"Unsupported scraper type please choose from {[scraper_type for scraper_type in ScraperType.__annotations__]}"
+            )

src/core/factory/searcher_factory.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from src.core.types import SearcherType
+from src.searcher.open_google_search import GoogleSearch
+class SearcherFactory:
+    @staticmethod
+    def initialize_searcher(searcher_type: str):
+        if searcher_type == SearcherType.OPEN_GOOGLE_SEARCH:
+            return GoogleSearch()
+        else:
+            raise Exception(
+                f"Unsupported searcher type please choose from {[searcher_type for searcher_type in SearcherType.__annotations__]}"
+            )

src/core/interface/__init__.py ADDED Viewed

File without changes

src/core/interface/analyzer_interface.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from abc import ABC, abstractmethod
+from typing import List
+from src.models.scrape_models import ScrapeResult
+from src.models.analyzer_models import AnalyzerResult
+class AnalyzerInterface(ABC):
+    @abstractmethod
+    def analyze_search_result(
+        query: str, search_result: List[ScrapeResult]
+    ) -> AnalyzerResult:
+        """
+        Analyzes the provided search results based on the given query.
+        Args:
+            query (str): The search query string.
+            search_result (List[ScrapeResult]): A list of search results to be analyzed.
+        Returns:
+            AnalyzerResult: The result of the analysis.
+        Raises:
+            NotImplementedError: If the method is not implemented by a subclass.
+        """
+        raise NotImplementedError

src/core/interface/scraper_interface.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from abc import ABC, abstractmethod
+from src.models.scrape_models import ScrapeQuery, ScrapeResult
+class ScraperInterface(ABC):
+    @abstractmethod
+    def get_url_content(url_parameters: ScrapeQuery) -> ScrapeResult:
+        """
+        Fetches the content of the specified URL and returns the result as a ScrapeResult object.
+        Args:
+            url_parameters (ScrapeQuery): The URL parameters to fetch content from.
+        Returns:
+            ScrapeResult: An object containing the scraped content and related metadata.
+        Raises:
+            NotImplementedError: This method should be implemented by subclasses.
+        """
+        raise NotImplementedError

src/core/interface/searcher_interface.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from abc import ABC, abstractmethod
+from typing import List, Optional
+from src.models.search_models import SearchResult
+class SearchInterface(ABC):
+    @abstractmethod
+    def search_custom_sites(
+        query: str, sites: Optional[List[str]] = None
+    ) -> SearchResult:
+        """
+        Searches for the given query across a list of custom sites.
+        Args:
+            query (str): The search query string.
+            sites (Optional[List[str]], optional): A list of site URLs or identifiers to restrict the search to.
+                If None, searches all available custom sites.
+        Returns:
+            SearchResult: An object containing the search results.
+        Raises:
+            NotImplementedError: This method must be implemented by subclasses.
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def search_custom_domains(
+        query: str, domains: Optional[List[str]] = None
+    ) -> SearchResult:
+        """
+        Searches for the given query within a specified custom domain.
+        Args:
+            query (str): The search query string.
+            domain (Optional[List[str]], optional): The custom domain to restrict the search to. Defaults to None.
+        Returns:
+            SearchResult: The result of the search operation.
+        Raises:
+            NotImplementedError: If the method is not implemented.
+        """
+        raise NotImplementedError

src/core/types.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from enum import Enum
+class SearcherType(str, Enum):
+    OPEN_GOOGLE_SEARCH: str = "Open Google Search"
+class ScraperType(str, Enum):
+    TRAFILATURA_SCRAPER: str = "trafilatura_scraper"
+class AnalyzerType(str, Enum):
+    OPENAI_ANALYZER: str = "openai_analyzer"
+# DEFAULT VALUES AND CONSTANTS
+DEFAULT_SYSTEM_PROMPT = """You are an intelligent assistant designed to answer user questions strictly based on the provided list of search result items. Each item includes a title, description, content, and URL. You must not use external knowledge or make assumptions beyond what is explicitly available in the search results.
+Your task is to generate a concise and informative response to the user’s query, ensuring that any factual claims in your answer are supported by specific excerpts from the `ScrapeResult` list. For each piece of information used from a scrape result, create a corresponding `Citation` object.
+You must return the result in the form of an `AnalyzerResult`, which includes:
+- `response_str`: The complete response text.
+- `citation`: A list of `Citation` entries referencing the exact part of the `response_str` that came from the scraped content.
+Each `Citation` must include:
+- `citation_type`: Always "url_citation".
+- `url`: The source URL as provided.
+- `start_index` and `end_index`: The exact character indices of the corresponding information in the `response_str`.
+Only include citations for parts that directly come from the `ScrapeResult`.
+Do not fabricate information. If the scraped results do not contain enough detail to fully answer the question, mention that in your answer.
+ """
+DEFAULT_USER_PROMPT = """
+  "query": {query},
+  "scrape_results": {scrape_results}
+"""
+DEFAULT_OPENAI_ANALYZER = "gpt-4o-mini"