File size: 3,232 Bytes
8261631
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""
ticker_utils.py
Final Version — uses your files:
- NSE_Equity.csv
- BSE_Equity.csv

Provides:
- find_ticker(user_text, exchange_preference="NSE")
- suggest(text, n=5)
"""

import pandas as pd
import difflib

# Load CSVs (must exist in your Colab files)
df_nse = pd.read_csv("NSE_Equity.csv")
df_bse = pd.read_csv("BSE_Equity.csv")

# normalize column names to lower for robust lookups
df_nse.columns = df_nse.columns.str.lower()
df_bse.columns = df_bse.columns.str.lower()

# expected column names (adjusted to your CSV structure)
if "symbol" in df_nse.columns:
    nse_symbol = "symbol"
else:
    raise ValueError("NSE CSV missing 'Symbol' column")

if "name of company" in df_nse.columns:
    nse_name = "name of company"
else:
    raise ValueError("NSE CSV missing 'Name of Company' column")

if "security code" in df_bse.columns:
    bse_symbol = "security code"
else:
    raise ValueError("BSE CSV missing 'Security Code' column")

if "issuer name" in df_bse.columns:
    bse_name = "issuer name"
else:
    raise ValueError("BSE CSV missing 'Issuer Name' column")

# create cleaned fields for fuzzy matching
df_nse["clean_name"] = df_nse[nse_name].astype(str).str.lower().str.strip()
df_bse["clean_name"] = df_bse[bse_name].astype(str).str.lower().str.strip()
df_nse["clean_symbol"] = df_nse[nse_symbol].astype(str).str.lower().str.strip()
df_bse["clean_symbol"] = df_bse[bse_symbol].astype(str).str.lower().str.strip()

# yahoo-style tickers
df_nse["ticker"] = df_nse[nse_symbol].astype(str).str.upper() + ".NS"
df_bse["ticker"] = df_bse[bse_symbol].astype(str).str.upper() + ".BO"

def suggest(text, n=5):
    """Return close company name matches (lowercase input)."""
    text = text.lower().strip()
    all_names = list(df_nse["clean_name"]) + list(df_bse["clean_name"])
    return difflib.get_close_matches(text, all_names, n=n, cutoff=0.6)

def find_ticker(text: str, exchange_preference: str = "NSE"):
    """
    Resolve user text to a Yahoo-style ticker string.
    exchange_preference: "NSE" or "BSE" (case-insensitive). Defaults to NSE.
    """
    text = text.lower().strip()

    # 1) exact match against NSE
    if exchange_preference and exchange_preference.upper() == "NSE":
        m_name = df_nse[df_nse["clean_name"] == text]
        m_sym = df_nse[df_nse["clean_symbol"] == text]
        if not m_name.empty:
            return m_name["ticker"].iloc[0]
        if not m_sym.empty:
            return m_sym["ticker"].iloc[0]

    # 2) exact match against BSE
    if exchange_preference and exchange_preference.upper() == "BSE":
        m_name = df_bse[df_bse["clean_name"] == text]
        m_sym = df_bse[df_bse["clean_symbol"] == text]
        if not m_name.empty:
            return m_name["ticker"].iloc[0]
        if not m_sym.empty:
            return m_sym["ticker"].iloc[0]

    # 3) fuzzy match (search both)
    close = suggest(text, n=1)
    if close:
        c = close[0]
        m1 = df_nse[df_nse["clean_name"] == c]
        if not m1.empty:
            return m1["ticker"].iloc[0]
        m2 = df_bse[df_bse["clean_name"] == c]
        if not m2.empty:
            return m2["ticker"].iloc[0]

    # 4) fallback: assume user provided symbol — append .NS
    return text.upper() + ".NS"