Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import bm25s | |
| from operator import itemgetter | |
| import os | |
| import re | |
| import pandas as pd | |
| def load_data(): | |
| df = pd.read_csv("cleaned_list.csv",header = None) | |
| df.columns = ['document'] | |
| corpus = [doc for doc in df['document'].to_list()] | |
| retriever = bm25s.BM25(corpus=corpus) | |
| retriever.index(bm25s.tokenize(corpus)) | |
| return retriever | |
| def extract_hscode(text): | |
| match = re.search(r'hs_code:\s*(\d+)', text) | |
| if match: | |
| return match.group(1) | |
| return None | |
| df2 = pd.read_csv("hscode_main.csv") | |
| new_col = [len(str(code))for code in df2['hs_code'].to_list()] | |
| df2['len'] = new_col | |
| new_hscode = [str(code) for code in df2['hs_code']] | |
| for i in range(len(new_col)): | |
| if new_col[i]==5: | |
| new_hscode[i] = '0'+ new_hscode[i] | |
| df2['hs_code'] = new_hscode | |
| df2=df2.drop(columns='len') | |
| if 'retriever' not in st.session_state: | |
| st.session_state.retriever = None | |
| if st.session_state.retriever is None: | |
| st.session_state.retriever = load_data() | |
| sentence = st.text_input("please enter description:") | |
| if sentence !='': | |
| results,_ = st.session_state.retriever.retrieve(bm25s.tokenize(sentence), k=5) | |
| doc = [d for d in results] | |
| hscodes = [extract_hscode(item) for item in doc[0]] | |
| for code in hscodes: | |
| filter_df = df2[df2['hs_code']==code] | |
| answer = filter_df['full_description'].iloc[0] | |
| st.write("Hscode:",code) | |
| st.write("answer:",answer.lower()) |