Spaces:

OnursFriends
/

StockAnalysisAgent

Sleeping

App Files Files Community

OnurKerimoglu commited on Feb 8, 2025

Commit

dfed37d

1 Parent(s): c20857a

introduced nb/ticker_list_search.ipynb

Browse files

Files changed (1) hide show

notebooks/ticker_lists_search.ipynb +133 -0

notebooks/ticker_lists_search.ipynb ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas\n",
+    "import json\n",
+    "import os\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# initial data prep\n",
+    "rootdir = os.path.dirname(os.path.abspath(\"\"))\n",
+    "fname_raw = os.path.join(rootdir, 'data_raw', 'sec_gov_company_tickers.json')\n",
+    "\n",
+    "with open(fname_raw, 'r') as f:\n",
+    "    data = json.load(f)\n",
+    "\n",
+    "titles = [None]*len(data)\n",
+    "tickers = [None]*len(data)\n",
+    "for k, v in data.items():\n",
+    "    i = int(k)\n",
+    "    titles[i] = v['title']\n",
+    "    tickers[i] = v['ticker']\n",
+    "data_compact = {'ticker': tickers, 'title': titles}\n",
+    "\n",
+    "fname_compact = os.path.join(rootdir, 'data', 'sec_gov_company_tickers_compact.json')\n",
+    "with open(fname_compact, 'w') as f:\n",
+    "    json.dump(data_compact, f)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(fname_compact, 'r') as f:\n",
+    "    data = json.load(f)\n",
+    "    \n",
+    "df = pandas.DataFrame.from_dict(data, orient='columns')\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from rapidfuzz import process, fuzz\n",
+    "\n",
+    "def read_ticker_data():\n",
+    "    rootdir = os.path.dirname(os.path.abspath(\"\"))\n",
+    "    fname_compact = os.path.join(rootdir, 'data', 'sec_gov_company_tickers_compact.json')\n",
+    "    with open(fname_compact, 'r') as f:\n",
+    "        data = json.load(f)\n",
+    "    df = pandas.DataFrame.from_dict(data, orient='columns')\n",
+    "    return df\n",
+    "\n",
+    "def find_best_matching_title(input_name, top_n=3):\n",
+    "    df = read_ticker_data()\n",
+    "    matches = process.extract(\n",
+    "        input_name,\n",
+    "        df[\"title\"],\n",
+    "        scorer=fuzz.WRatio,\n",
+    "        limit=top_n)\n",
+    "\n",
+    "    results = [(df.iloc[idx][\"ticker\"], title, score) for title, score, idx in matches]\n",
+    "    return results\n",
+    "\n",
+    "def find_best_matching_ticker(input_name, top_n=3):\n",
+    "    df = read_ticker_data()\n",
+    "    matches = process.extract(\n",
+    "        input_name.upper(),\n",
+    "        df[\"ticker\"],\n",
+    "        scorer=fuzz.WRatio,\n",
+    "        limit=top_n)\n",
+    "\n",
+    "    results = [(df.iloc[idx][\"title\"], ticker, score) for ticker, score, idx in matches]\n",
+    "    return results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Example Usage\n",
+    "print(find_best_matching_title(\"alphab\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(find_best_matching_ticker(\"msft\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "finagents_py311",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}