Spaces:

OnursFriends
/

StockAnalysisAgent

Running

App Files Files Community

OnurKerimoglu commited on Feb 9, 2025

Commit

61b2df0

1 Parent(s): e25e756

introduced

Browse files

Files changed (2) hide show

requirements.txt +3 -0
src/ticker_finder.py +205 -0

requirements.txt CHANGED Viewed

@@ -20,6 +20,7 @@ executing==2.2.0
 fastapi==0.115.8
 ffmpy==0.5.0
 filelock==3.17.0
 fonttools==4.55.8
 frozendict==2.4.6
 frozenlist==1.5.0
@@ -105,6 +106,7 @@ python-multipart==0.0.20
 pytz==2025.1
 PyYAML==6.0.2
 pyzmq==26.2.1
 regex==2024.11.6
 requests==2.32.3
 requests-toolbelt==1.0.0
@@ -125,6 +127,7 @@ stack-data==0.6.3
 starlette==0.45.3
 sympy==1.13.1
 ta==0.11.0
 tenacity==9.0.0
 threadpoolctl==3.5.0
 tiktoken==0.8.0

 fastapi==0.115.8
 ffmpy==0.5.0
 filelock==3.17.0
+-e git+https://github.com/OnurKerimoglu/financial_agents.git@e25e7566c95beb6678ff63f4c3fb7798e1f6ec27#egg=Financial_Agents
 fonttools==4.55.8
 frozendict==2.4.6
 frozenlist==1.5.0
 pytz==2025.1
 PyYAML==6.0.2
 pyzmq==26.2.1
+RapidFuzz==3.12.1
 regex==2024.11.6
 requests==2.32.3
 requests-toolbelt==1.0.0
 starlette==0.45.3
 sympy==1.13.1
 ta==0.11.0
+tabulate==0.9.0
 tenacity==9.0.0
 threadpoolctl==3.5.0
 tiktoken==0.8.0

src/ticker_finder.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import json
+import logging
+import os
+import pandas as pd
+from rapidfuzz import process, fuzz
+import requests
+from tabulate import tabulate
+class TickerFinder():
+    """
+    A class for finding the best matching ticker for a given company name or ticker.
+    Uses data from https://www.sec.gov/files/company_tickers.json and rapidfuzz package for fuzzy matching.
+    """
+    def __init__(self):
+        """
+        Initialize the TickerFinder object.
+        This method sets the file paths and reads the ticker data into the self.df attribute.
+        """
+        self.logger = logging.getLogger(__name__)
+        self.rootdir = os.path.dirname(os.path.dirname(__file__))
+        self.fname_raw = os.path.join(self.rootdir, 'data_raw', 'sec_gov_company_tickers_test.json')
+        self.fname_compact = os.path.join(self.rootdir, 'data', 'sec_gov_company_tickers_compact.json')
+        self.df = self.read_ticker_data()
+        self.logger.info('Initialized TickerFinder object')
+    def read_ticker_data(
+            self
+            ) -> pd.DataFrame:
+        """
+        Read compact ticker data from a local file.
+        Returns
+        df : pandas DataFrame
+        """
+        # if the compact data is not available, create it
+        if not os.path.exists(self.fname_compact):
+            self.logger.info(f'Compact ticker data was not found at {self.fname_compact}, creating it')
+            self.compact_ticker_data()
+        with open(self.fname_compact, 'r') as f:
+            data = json.load(f)
+        df = pd.DataFrame.from_dict(data, orient='columns')
+        self.logger.info(f'Read compact ticker data from {self.fname_compact}')
+        return df
+    def compact_ticker_data(
+            self
+            ) -> None:
+        """
+        Compact the raw ticker data by extracting only the ticker and title fields and
+        saving them to a local file.
+        If the raw data is not available, this method will download it first.
+        """
+        if not os.path.exists(self.fname_raw):
+            self.logger.info(f'Raw ticker data was not found at {self.fname_raw}, downloading it')
+            self.download_ticker_data()
+        # read the raw data
+        with open(self.fname_raw, 'r') as f:
+            data = json.load(f)
+        # extract the necessary fields
+        titles = [None]*len(data)
+        tickers = [None]*len(data)
+        for k, v in data.items():
+            i = int(k)
+            titles[i] = v['title']
+            tickers[i] = v['ticker']
+        data_compact = {'ticker': tickers, 'title': titles}
+        # save the compact data
+        with open(self.fname_compact, 'w') as f:
+            json.dump(data_compact, f)
+        self.logger.info(f'Compacted raw ticker data into {self.fname_compact}')
+    def download_ticker_data(
+            self
+        ) -> None:
+        """
+        Download the raw ticker data from https://www.sec.gov/files/company_tickers.json
+        using the requests package. The data is saved to a local file.
+        If the download is successful, the raw data is saved as a JSON file.
+        If the download fails, an exception is raised.
+        """
+        url = "https://www.sec.gov/files/company_tickers.json"
+        headers = {
+            "User-Agent": "censored_email_address",
+            "Accept-Encoding": "gzip, deflate",
+            "Host": "www.sec.gov",
+            "Connection": "keep-alive"
+        }
+        response = requests.get(url, headers=headers)
+        if response.status_code != 200:
+            raise Exception(f"Error downloading ticker data from url.\nResponse status code: {response.status_code}")
+        else:
+            df = pd.read_json(response.text)
+        # save the raw data
+        with open(self.fname_raw, 'w') as f:
+            df.to_json(f)
+        self.logger.info(f'Dowloaded raw ticker data into {self.fname_raw}')
+    def find_best_matching_title(
+            self,
+            input_name:str,
+            top_n=3) -> pd.DataFrame:
+        """
+        Find the best matching company title for a given company name.
+        Args:
+        input_name : str
+            The name to search for
+        top_n : int, default=3
+            The number of top matches to return
+        Returns:
+        results : pd.DataFrame
+            A pd.df containing the matched title, ticker, and fuzzy matching score
+        """
+        matches = process.extract(
+            input_name.lower(),
+            self.df["title"].str.lower(),
+            # scorer=fuzz.WRatio,
+            # scorer=fuzz.partial_ratio,
+            scorer=fuzz.ratio,
+            limit=top_n)
+        results = [(self.df.iloc[idx]["title"], self.df.iloc[idx]["ticker"], score) for title, score, idx in matches]
+        df = pd.DataFrame(results, columns=["Title", "Ticker", "Score"])
+        return df
+    def find_best_matching_ticker(
+            self,
+            ticker:str,
+            top_n:int=3) -> pd.DataFrame:
+        """
+        Find the best matching company ticker for a given ticker.
+        Args:
+        ticker : str
+            The ticker to search for
+        top_n : int, default=3
+            The number of top matches to return
+        Returns:
+        results : pd.DataFrame
+            A pd.df containing the title, matched ticker, and fuzzy matching score
+        """
+        matches = process.extract(
+            ticker.upper(),
+            self.df["ticker"],
+            # scorer=fuzz.WRatio,
+            # scorer=fuzz.partial_ratio,
+            scorer=fuzz.ratio,
+            limit=top_n)
+        results = [(self.df.iloc[idx]["title"], self.df.iloc[idx]["ticker"], score) for ticker, score, idx in matches]
+        df = pd.DataFrame(results, columns=["Title", "Ticker", "Score"])
+        return df
+    def find_best_matching_ticker_or_title(
+        self,
+        user_input: str
+        ) -> str:
+        """
+        Find the best matching company ticker for a given user input, which may be a ticker or a title.
+        Args:
+        user_input : str
+            The user input to search for
+        Returns:
+        results : str
+            A string containing the best matching title and ticker
+        """
+        # user may be trying to write a ticker, in which case find the best matching ticker:
+        ticker_matches = self.find_best_matching_ticker(user_input)
+        # user may be trying to write a title, in which case find the best matching title:
+        title_matches = self.find_best_matching_title(user_input)
+        # total matches:
+        c_matches = pd.concat([ticker_matches, title_matches])
+        # deduplicates:
+        c_matches_dedup = c_matches.groupby(['Ticker', 'Title'], as_index=False)['Score'].sum()
+        # sort by score:
+        c_matches_sorted = c_matches_dedup.sort_values(by='Score', ascending=False)
+        # convert results into a pretty string:
+        results = self.df_to_pretty_string(c_matches_sorted )
+        return(results)
+    def df_to_pretty_string(
+            self,
+            df):
+        """
+        Convert a pd.DataFrame into a pretty string, using the tabulate package.
+        Args:
+        df : pd.DataFrame
+            The dataframe to convert
+        Returns:
+        pretty_string : str
+            A string containing the pretty-formatted dataframe
+        """
+        pretty_string = tabulate(df[['Title', 'Ticker']],
+              # headers='keys',
+              tablefmt='plain',
+              showindex=False,
+              numalign='left',
+              stralign='left')
+        return pretty_string
+if __name__ == "__main__":
+      # results = TickerFinder().download_ticker_data()
+      results = TickerFinder().find_best_matching_ticker_or_title("microsoft")
+      print(results)