Spaces:

JC321
/

EasyReportsMCPServer

Sleeping

App Files Files Community

JC321 commited on Nov 28, 2025

Commit

74fcd29

verified ·

1 Parent(s): 875213d

Upload 3 files

Browse files

Files changed (3) hide show

edgar_client.py +112 -43
financial_analyzer.py +53 -19
mcp_server_sse.py +9 -0

edgar_client.py CHANGED Viewed

@@ -43,9 +43,16 @@ class EdgarDataClient:
         # Cache for frequently accessed data
         self._company_cache = {}  # Cache company info to avoid repeated calls
-        self._cache_ttl = 300  # 5 minutes cache TTL
         self._cache_timestamps = {}
         if EdgarClient:
             self.edgar = EdgarClient(user_agent=user_agent)
         else:
@@ -68,7 +75,9 @@ class EdgarDataClient:
         if cache_key not in self._cache_timestamps:
             return False
         age = time.time() - self._cache_timestamps[cache_key]
-        return age < self._cache_ttl
     def _get_cached(self, cache_key):
         """Get cached data if valid"""
@@ -116,64 +125,124 @@ class EdgarDataClient:
                 time.sleep(2 ** attempt)
         return None
-    def search_company_by_name(self, company_name):
-        """Search company CIK by company name with caching and optimized search"""
-        try:
-            # Check cache for company_tickers.json
-            cache_key = "company_tickers_json"
-            companies = self._get_cached(cache_key)
-            if not companies:
-                # Use SEC company ticker database
                 url = "https://www.sec.gov/files/company_tickers.json"
                 response = self._make_request_with_retry(url)
                 if not response:
                     return None
                 companies = response.json()
-                # Cache the entire company list for 5 minutes
                 self._set_cache(cache_key, companies)
             # Prepare search input
             search_name = company_name.lower().strip()
-            # Optimize: Use early return for exact matches
-            # First pass: Look for exact ticker or exact name match (fastest)
-            for _, company in companies.items():
-                company_ticker = company["ticker"].lower()
-                company_title = company["title"].lower()
-                # Exact ticker match (highest priority) - return immediately
-                if search_name == company_ticker:
-                    return {
-                        "cik": str(company["cik_str"]).zfill(10),
-                        "name": company["title"],
-                        "ticker": company["ticker"]
-                    }
-                # Exact name match - return immediately
-                if search_name == company_title:
-                    return {
-                        "cik": str(company["cik_str"]).zfill(10),
-                        "name": company["title"],
-                        "ticker": company["ticker"]
-                    }
-            # Second pass: Look for partial matches (only if no exact match found)
             matches = []
-            for _, company in companies.items():
-                company_title = company["title"].lower()
-                company_ticker = company["ticker"].lower()
                 # Partial match in name or ticker
-                if search_name in company_title or search_name in company_ticker:
-                    matches.append({
-                        "cik": str(company["cik_str"]).zfill(10),
-                        "name": company["title"],
-                        "ticker": company["ticker"]
-                    })
                     # Optimize: Stop after finding 10 matches to avoid scanning all 13,000+
                     if len(matches) >= 10:
                         break

         # Cache for frequently accessed data
         self._company_cache = {}  # Cache company info to avoid repeated calls
+        self._cache_ttl = 300  # 5 minutes cache TTL (for company info)
+        self._tickers_cache_ttl = 3600  # 1 hour for company tickers (rarely changes)
         self._cache_timestamps = {}
+        # Fast lookup indexes for company tickers
+        self._ticker_index = {}  # ticker -> company data
+        self._cik_index = {}  # cik -> company data
+        self._name_lower_index = {}  # lowercase name -> company data
+        self._index_loaded = False
         if EdgarClient:
             self.edgar = EdgarClient(user_agent=user_agent)
         else:
         if cache_key not in self._cache_timestamps:
             return False
         age = time.time() - self._cache_timestamps[cache_key]
+        # Use longer TTL for company tickers list
+        ttl = self._tickers_cache_ttl if cache_key == "company_tickers_json" else self._cache_ttl
+        return age < ttl
     def _get_cached(self, cache_key):
         """Get cached data if valid"""
                 time.sleep(2 ** attempt)
         return None
+    def _load_company_tickers(self, force_refresh=False):
+        """Load and index company tickers data"""
+        cache_key = "company_tickers_json"
+        # Check if already loaded and cache is valid
+        if self._index_loaded and not force_refresh and self._is_cache_valid(cache_key):
+            return self._get_cached(cache_key)
+        # Check cache first
+        companies = self._get_cached(cache_key) if not force_refresh else None
+        if not companies:
+            try:
+                # Download company tickers
                 url = "https://www.sec.gov/files/company_tickers.json"
+                print(f"Downloading company tickers from SEC...")
                 response = self._make_request_with_retry(url)
                 if not response:
+                    print("Failed to download company tickers")
                     return None
                 companies = response.json()
+                # Cache for 1 hour
                 self._set_cache(cache_key, companies)
+                print(f"Loaded {len(companies)} companies")
+            except Exception as e:
+                print(f"Error loading company tickers: {e}")
+                return None
+        else:
+            print(f"Using cached company tickers ({len(companies)} companies)")
+        # Build fast lookup indexes
+        self._ticker_index = {}
+        self._cik_index = {}
+        self._name_lower_index = {}
+        for _, company in companies.items():
+            cik = str(company["cik_str"]).zfill(10)
+            ticker = company["ticker"]
+            name = company["title"]
+            company_data = {
+                "cik": cik,
+                "name": name,
+                "ticker": ticker
+            }
+            # Index by ticker (lowercase for case-insensitive)
+            self._ticker_index[ticker.lower()] = company_data
+            # Index by CIK
+            self._cik_index[cik] = company_data
+            # Index by exact name (lowercase)
+            self._name_lower_index[name.lower()] = company_data
+        self._index_loaded = True
+        print(f"Built indexes: {len(self._ticker_index)} tickers, {len(self._cik_index)} CIKs")
+        return companies
+    def get_company_by_cik(self, cik):
+        """Fast lookup of company info by CIK (from cached tickers)"""
+        # Ensure data is loaded
+        self._load_company_tickers()
+        # Normalize CIK
+        cik_normalized = str(cik).zfill(10)
+        # Fast index lookup
+        return self._cik_index.get(cik_normalized)
+    def get_company_by_ticker(self, ticker):
+        """Fast lookup of company info by ticker"""
+        # Ensure data is loaded
+        self._load_company_tickers()
+        # Fast index lookup (case-insensitive)
+        return self._ticker_index.get(ticker.lower())
+    def search_company_by_name(self, company_name):
+        """Search company CIK by company name with caching and optimized search"""
+        try:
+            # Load company tickers and build indexes
+            companies = self._load_company_tickers()
+            if not companies:
+                return None
             # Prepare search input
             search_name = company_name.lower().strip()
+            # Optimize: Use fast index lookups first
+            # Priority 1: Exact ticker match (fastest - O(1) hash lookup)
+            if search_name in self._ticker_index:
+                return self._ticker_index[search_name].copy()
+            # Priority 2: Exact name match (fast - O(1) hash lookup)
+            if search_name in self._name_lower_index:
+                return self._name_lower_index[search_name].copy()
+            # Priority 3: Exact CIK match (fast - O(1) hash lookup)
+            # Handle CIK input (8-10 digits)
+            if search_name.isdigit() and len(search_name) >= 8:
+                cik_normalized = search_name.zfill(10)
+                if cik_normalized in self._cik_index:
+                    return self._cik_index[cik_normalized].copy()
+            # Priority 4: Partial matches (slower - requires iteration)
+            # Only execute if exact matches fail
             matches = []
+            for ticker_lower, company_data in self._ticker_index.items():
+                name_lower = company_data["name"].lower()
                 # Partial match in name or ticker
+                if search_name in name_lower or search_name in ticker_lower:
+                    matches.append(company_data.copy())
                     # Optimize: Stop after finding 10 matches to avoid scanning all 13,000+
                     if len(matches) >= 10:
                         break

financial_analyzer.py CHANGED Viewed

@@ -28,40 +28,74 @@ class FinancialAnalyzer:
         # Strip whitespace
         company_input = company_input.strip()
-        # Strategy 1: If input is numeric and looks like CIK (8-10 digits), treat as CIK
         if company_input.isdigit() and len(company_input) >= 8:
-            # Pad to 10 digits if needed
             cik = company_input.zfill(10)
-            company_info = self.edgar_client.get_company_info(cik)
-            if company_info:
-                return company_info
             else:
-                return {"error": "Company not found for specified CIK"}
-        # Strategy 2: Search by name/ticker
         # This returns basic info: {cik, name, ticker}
         basic_info = self.edgar_client.search_company_by_name(company_input)
         if not basic_info:
             return {"error": "No matching company found"}
-        # Strategy 3: Decide whether to fetch detailed info
-        # If user input is short (likely a ticker), return enriched basic info quickly
-        # If user input is long (likely a full name), get detailed info
-        input_length = len(company_input)
-        is_likely_ticker = input_length <= 5 and company_input.isupper()
-        # For ticker searches, return enriched basic info without additional API call
         if is_likely_ticker:
-            # Quick response with basic info + some enrichment
             return {
                 "cik": basic_info['cik'],
                 "name": basic_info['name'],
                 "tickers": [basic_info['ticker']] if basic_info.get('ticker') else [],
-                "ein": None,  # Not available in basic search
-                "fiscal_year_end": None,  # Not available in basic search
-                "sic_description": None,  # Not available in basic search
                 "_source": "quick_search",
                 "_note": "Basic info from ticker search. Use get_company_info for full details."
             }

         # Strip whitespace
         company_input = company_input.strip()
+        # Strategy 1: If input is numeric and looks like CIK (8-10 digits), use fast CIK lookup
         if company_input.isdigit() and len(company_input) >= 8:
+            # Normalize CIK to 10 digits
             cik = company_input.zfill(10)
+            # Try fast lookup first (from cached tickers)
+            basic_info = self.edgar_client.get_company_by_cik(cik)
+            if basic_info:
+                # Fast path succeeded, now get detailed info
+                company_info = self.edgar_client.get_company_info(cik)
+                if company_info:
+                    return company_info
+                else:
+                    # Fallback to basic info if detailed fetch fails
+                    return {
+                        "cik": basic_info['cik'],
+                        "name": basic_info['name'],
+                        "tickers": [basic_info['ticker']] if basic_info.get('ticker') else [],
+                        "_source": "basic_cik_lookup"
+                    }
             else:
+                # CIK not found in cache, try full API call
+                company_info = self.edgar_client.get_company_info(cik)
+                if company_info:
+                    return company_info
+                else:
+                    return {"error": "Company not found for specified CIK"}
+        # Strategy 2: Check if it looks like a ticker (short uppercase)
+        input_length = len(company_input)
+        is_likely_ticker = input_length <= 5 and company_input.isupper()
+        if is_likely_ticker:
+            # Try fast ticker lookup first
+            basic_info = self.edgar_client.get_company_by_ticker(company_input)
+            if basic_info:
+                # Fast ticker lookup succeeded - return enriched basic info
+                return {
+                    "cik": basic_info['cik'],
+                    "name": basic_info['name'],
+                    "tickers": [basic_info['ticker']] if basic_info.get('ticker') else [],
+                    "ein": None,  # Not available in basic search
+                    "fiscal_year_end": None,  # Not available in basic search
+                    "sic_description": None,  # Not available in basic search
+                    "_source": "quick_ticker_search",
+                    "_note": "Basic info from ticker search. Use get_company_info for full details."
+                }
+        # Strategy 3: General search by name/ticker
         # This returns basic info: {cik, name, ticker}
         basic_info = self.edgar_client.search_company_by_name(company_input)
         if not basic_info:
             return {"error": "No matching company found"}
+        # Strategy 4: Decide whether to fetch detailed info
+        # For ticker-like searches, return basic info quickly
         if is_likely_ticker:
+            # Quick response with basic info
             return {
                 "cik": basic_info['cik'],
                 "name": basic_info['name'],
                 "tickers": [basic_info['ticker']] if basic_info.get('ticker') else [],
+                "ein": None,
+                "fiscal_year_end": None,
+                "sic_description": None,
                 "_source": "quick_search",
                 "_note": "Basic info from ticker search. Use get_company_info for full details."
             }

mcp_server_sse.py CHANGED Viewed

@@ -78,6 +78,15 @@ financial_analyzer = FinancialAnalyzer(
     user_agent="Juntao Peng Financial Report Metrics App (jtyxabc@gmail.com)"
 )
 # ==================== MCP Protocol Implementation ====================

     user_agent="Juntao Peng Financial Report Metrics App (jtyxabc@gmail.com)"
 )
+# Preload company tickers data on startup for better performance
+print("[Startup] Preloading company tickers data...")
+try:
+    edgar_client._load_company_tickers()
+    print("[Startup] Company tickers preloaded successfully")
+except Exception as e:
+    print(f"[Startup] Warning: Failed to preload company tickers: {e}")
+    print("[Startup] Will load on first request")
 # ==================== MCP Protocol Implementation ====================