OnurKerimoglu commited on
Commit
dfed37d
·
1 Parent(s): c20857a

introduced nb/ticker_list_search.ipynb

Browse files
Files changed (1) hide show
  1. notebooks/ticker_lists_search.ipynb +133 -0
notebooks/ticker_lists_search.ipynb ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas\n",
10
+ "import json\n",
11
+ "import os\n"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "# initial data prep\n",
21
+ "rootdir = os.path.dirname(os.path.abspath(\"\"))\n",
22
+ "fname_raw = os.path.join(rootdir, 'data_raw', 'sec_gov_company_tickers.json')\n",
23
+ "\n",
24
+ "with open(fname_raw, 'r') as f:\n",
25
+ " data = json.load(f)\n",
26
+ "\n",
27
+ "titles = [None]*len(data)\n",
28
+ "tickers = [None]*len(data)\n",
29
+ "for k, v in data.items():\n",
30
+ " i = int(k)\n",
31
+ " titles[i] = v['title']\n",
32
+ " tickers[i] = v['ticker']\n",
33
+ "data_compact = {'ticker': tickers, 'title': titles}\n",
34
+ "\n",
35
+ "fname_compact = os.path.join(rootdir, 'data', 'sec_gov_company_tickers_compact.json')\n",
36
+ "with open(fname_compact, 'w') as f:\n",
37
+ " json.dump(data_compact, f)\n"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": null,
43
+ "metadata": {},
44
+ "outputs": [],
45
+ "source": [
46
+ "with open(fname_compact, 'r') as f:\n",
47
+ " data = json.load(f)\n",
48
+ " \n",
49
+ "df = pandas.DataFrame.from_dict(data, orient='columns')\n",
50
+ "df.head()"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": null,
56
+ "metadata": {},
57
+ "outputs": [],
58
+ "source": [
59
+ "from rapidfuzz import process, fuzz\n",
60
+ "\n",
61
+ "def read_ticker_data():\n",
62
+ " rootdir = os.path.dirname(os.path.abspath(\"\"))\n",
63
+ " fname_compact = os.path.join(rootdir, 'data', 'sec_gov_company_tickers_compact.json')\n",
64
+ " with open(fname_compact, 'r') as f:\n",
65
+ " data = json.load(f)\n",
66
+ " df = pandas.DataFrame.from_dict(data, orient='columns')\n",
67
+ " return df\n",
68
+ "\n",
69
+ "def find_best_matching_title(input_name, top_n=3):\n",
70
+ " df = read_ticker_data()\n",
71
+ " matches = process.extract(\n",
72
+ " input_name,\n",
73
+ " df[\"title\"],\n",
74
+ " scorer=fuzz.WRatio,\n",
75
+ " limit=top_n)\n",
76
+ "\n",
77
+ " results = [(df.iloc[idx][\"ticker\"], title, score) for title, score, idx in matches]\n",
78
+ " return results\n",
79
+ "\n",
80
+ "def find_best_matching_ticker(input_name, top_n=3):\n",
81
+ " df = read_ticker_data()\n",
82
+ " matches = process.extract(\n",
83
+ " input_name.upper(),\n",
84
+ " df[\"ticker\"],\n",
85
+ " scorer=fuzz.WRatio,\n",
86
+ " limit=top_n)\n",
87
+ "\n",
88
+ " results = [(df.iloc[idx][\"title\"], ticker, score) for ticker, score, idx in matches]\n",
89
+ " return results"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": null,
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": [
98
+ "# Example Usage\n",
99
+ "print(find_best_matching_title(\"alphab\"))"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": null,
105
+ "metadata": {},
106
+ "outputs": [],
107
+ "source": [
108
+ "print(find_best_matching_ticker(\"msft\"))"
109
+ ]
110
+ }
111
+ ],
112
+ "metadata": {
113
+ "kernelspec": {
114
+ "display_name": "finagents_py311",
115
+ "language": "python",
116
+ "name": "python3"
117
+ },
118
+ "language_info": {
119
+ "codemirror_mode": {
120
+ "name": "ipython",
121
+ "version": 3
122
+ },
123
+ "file_extension": ".py",
124
+ "mimetype": "text/x-python",
125
+ "name": "python",
126
+ "nbconvert_exporter": "python",
127
+ "pygments_lexer": "ipython3",
128
+ "version": "3.11.1"
129
+ }
130
+ },
131
+ "nbformat": 4,
132
+ "nbformat_minor": 2
133
+ }