Spaces:
Running
Running
File size: 9,637 Bytes
2b45b17 5779612 2b45b17 5779612 3eb462c 2b45b17 5779612 2b45b17 5779612 2b45b17 7221896 2b45b17 25789b6 2b45b17 5779612 25789b6 5779612 7221896 5779612 2b45b17 958ac64 2b45b17 958ac64 2b45b17 bb3914c 2b45b17 958ac64 2b45b17 bb3914c 2b45b17 958ac64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
import streamlit as st
import pandas as pd
import json
from tools import sourceformat as sf
#===config===
st.set_page_config(
page_title="Coconut",
page_icon="π₯₯",
layout="wide",
initial_sidebar_state="collapsed"
)
hide_streamlit_style = """
<style>
#MainMenu
{visibility: hidden;}
footer {visibility: hidden;}
[data-testid="collapsedControl"] {display: none}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
st.page_link("https://www.coconut-libtool.com/the-app", label="Go to app", icon="π₯₯")
def reset_data():
st.cache_data.clear()
#===check filetype===
@st.cache_data(ttl=3600)
def get_ext(extype):
extype = uploaded_file.name
return extype
#===upload===
@st.cache_data(ttl=3600)
def upload(extype):
keywords = pd.read_csv(uploaded_file)
if "dimensions" in uploaded_file.name.lower():
keywords = sf.dim(keywords)
col_dict = {'MeSH terms': 'Keywords',
'PubYear': 'Year',
'Times cited': 'Cited by',
'Publication Type': 'Document Type'
}
keywords.rename(columns=col_dict, inplace=True)
elif "ids.openalex" in keywords.columns:
keywords.rename(columns={'keywords.display_name': 'Keywords', 'publication_year': 'Year',
'cited_by_count': 'Cited by', 'type': 'Document Type',
'primary_location.source.display_name': 'Source title'}, inplace=True)
return keywords
@st.cache_data(ttl=3600)
def conv_txt(extype):
if("PMID" in (uploaded_file.read()).decode()):
uploaded_file.seek(0)
papers = sf.medline(uploaded_file)
print(papers)
return papers
col_dict = {'TI': 'Title',
'SO': 'Source title',
'DE': 'Author Keywords',
'DT': 'Document Type',
'AB': 'Abstract',
'TC': 'Cited by',
'PY': 'Year',
'ID': 'Keywords Plus',
'rights_date_used': 'Year'}
uploaded_file.seek(0)
papers = pd.read_csv(uploaded_file, sep='\t')
if("htid" in papers.columns):
papers = sf.htrc(papers)
papers.rename(columns=col_dict, inplace=True)
print(papers)
return papers
@st.cache_data(ttl=3600)
def conv_json(extype):
col_dict={'title': 'title',
'rights_date_used': 'Year',
'content_provider_code':'Source title'
}
data = json.load(uploaded_file)
hathifile = data['gathers']
keywords = pd.DataFrame.from_records(hathifile)
keywords = sf.htrc(keywords)
keywords['Cited by'] = keywords.groupby(['Keywords'])['Keywords'].transform('size')
keywords.rename(columns=col_dict,inplace=True)
return keywords
@st.cache_data(ttl=3600)
def conv_pub(extype):
if (get_ext(extype)).endswith('.tar.gz'):
bytedata = extype.read()
keywords = sf.readPub(bytedata)
elif (get_ext(extype)).endswith('.xml'):
bytedata = extype.read()
keywords = sf.readxml(bytedata)
return keywords
@st.cache_data(ttl=3600)
def readxls(file):
papers = pd.read_excel(uploaded_file, sheet_name=0, engine='openpyxl')
if "About the data" in papers.columns[0]:
papers = sf.dim(papers)
col_dict = {'MeSH terms': 'Keywords',
'PubYear': 'Year',
'Times cited': 'Cited by',
'Publication Type': 'Document Type'
}
papers.rename(columns=col_dict, inplace=True)
return papers
st.header('File Checker', anchor=False)
st.subheader('Put your file here...', anchor=False)
#===read data===
uploaded_file = st.file_uploader('', type=['csv', 'txt', 'json', 'tar.gz', 'xml', 'xls', 'xlsx'], on_change=reset_data)
if uploaded_file is not None:
extype = get_ext(uploaded_file)
if extype.endswith('.csv'):
data = upload(extype)
elif extype.endswith('.txt'):
data = conv_txt(extype)
elif extype.endswith('.json'):
data = conv_json(extype)
elif extype.endswith('.tar.gz') or extype.endswith('.xml'):
data = conv_pub(uploaded_file)
elif extype.endswith(('.xls', '.xlsx')):
papers = readxls(uploaded_file)
col1, col2, col3 = st.columns(3)
with col1:
#===check keywords===
keycheck = list(data.columns)
keycheck = [k for k in keycheck if 'Keyword' in k]
container1 = st.container(border=True)
if not keycheck:
container1.subheader('β Keyword Stem', divider='red', anchor=False)
container1.write("Unfortunately, you don't have a column containing keywords in your data. Please check again. If you want to use it in another column, please rename it to 'Keywords'.")
else:
container1.subheader('βοΈ Keyword Stem', divider='blue', anchor=False)
container1.write('Congratulations! You can use Keywords Stem')
#===Sunburst===
if 'Publication Year' in data.columns:
data.rename(columns={'Publication Year': 'Year', 'Citing Works Count': 'Cited by',
'Publication Type': 'Document Type', 'Source Title': 'Source title'}, inplace=True)
col2check = ['Document Type','Source title','Cited by','Year']
miss_col = [column for column in col2check if column not in data.columns]
container2 = st.container(border=True)
if not miss_col:
container2.subheader('βοΈ Sunburst', divider='blue', anchor=False)
container2.write('Congratulations! You can use Sunburst')
else:
container2.subheader('β Sunburst', divider='red', anchor=False)
miss_col_str = ', '.join(miss_col)
container2.write(f"Unfortunately, you don't have: {miss_col_str}. Please check again.")
#===check any obj===
coldf = sorted(data.select_dtypes(include=['object']).columns.tolist())
container3 = st.container(border=True)
if not coldf or data.shape[0] < 2:
container3.subheader('β Topic Modeling', divider='red', anchor=False)
container3.write("Unfortunately, you don't have a column containing object in your data. Please check again.")
else:
container3.subheader('βοΈ Topic Modeling', divider='blue', anchor=False)
container3.write('Congratulations! You can use Topic Modeling')
with col2:
#===Burst===
container4 = st.container(border=True)
if not coldf or 'Year' not in data.columns:
container4.subheader('β Burst Detection', divider='red', anchor=False)
container4.write("Unfortunately, you don't have a column containing object in your data or a 'Year' column. Please check again.")
else:
container4.subheader('βοΈ Burst Detection', divider='blue', anchor=False)
container4.write('Congratulations! You can use Burst Detection')
#===bidirected===
container5 = st.container(border=True)
if not keycheck:
container5.subheader('β Bidirected Network', divider='red', anchor=False)
container5.write("Unfortunately, you don't have a column containing keywords in your data. Please check again. If you want to use it in another column, please rename it to 'Keywords'.")
else:
container5.subheader('βοΈ Bidirected Network', divider='blue', anchor=False)
container5.write('Congratulations! You can use Bidirected Network')
#===scattertext===
container6 = st.container(border=True)
if not coldf or data.shape[0] < 2:
container6.subheader('β Scattertext', divider='red', anchor=False)
container6.write("Unfortunately, you don't have a column containing object in your data. Please check again.")
else:
container6.subheader('βοΈ Scattertext', divider='blue', anchor=False)
container6.write('Congratulations! You can use Scattertext')
with col3:
#===shifterator===
container7 = st.container(border=True)
if not coldf or data.shape[0] < 2:
container7.subheader('β Shifterator', divider='red', anchor=False)
container7.write("Unfortunately, you don't have a column containing object in your data. Please check again.")
else:
container7.subheader('βοΈ Shifterator', divider='blue', anchor=False)
container7.write('Congratulations! You can use Shifterator')
#===sentiment===
container8 = st.container(border=True)
if not coldf or data.shape[0] < 2:
container8.subheader('β Sentiment Analysis', divider='red', anchor=False)
container8.write("Unfortunately, you don't have a column containing object in your data. Please check again.")
else:
container8.subheader('βοΈ Sentiment Analysis', divider='blue', anchor=False)
container8.write('Congratulations! You can use Sentiment Analysis')
#===wordcloud===
container9 = st.container(border=True)
if not coldf or data.shape[0] < 2:
container9.subheader('β Wordcloud', divider='red', anchor=False)
container9.write("Unfortunately, you don't have a column containing object in your data. Please check again.")
else:
container9.subheader('βοΈ Wordcloud', divider='blue', anchor=False)
container9.write('Congratulations! You can use Wordcloud')
|