faizhalas commited on
Commit
5779612
·
verified ·
1 Parent(s): 2d38bac

Update pages/0 FileChecker.py

Browse files
Files changed (1) hide show
  1. pages/0 FileChecker.py +59 -5
pages/0 FileChecker.py CHANGED
@@ -1,5 +1,7 @@
1
  import streamlit as st
2
  import pandas as pd
 
 
3
 
4
  #===config===
5
  st.set_page_config(
@@ -34,10 +36,23 @@ def get_ext(extype):
34
  @st.cache_data(ttl=3600)
35
  def upload(extype):
36
  keywords = pd.read_csv(uploaded_file)
 
 
 
 
 
 
 
 
37
  return keywords
38
 
39
  @st.cache_data(ttl=3600)
40
  def conv_txt(extype):
 
 
 
 
 
41
  col_dict = {'TI': 'Title',
42
  'SO': 'Source title',
43
  'DE': 'Author Keywords',
@@ -45,16 +60,48 @@ def conv_txt(extype):
45
  'AB': 'Abstract',
46
  'TC': 'Cited by',
47
  'PY': 'Year',
48
- 'ID': 'Keywords Plus'}
49
- keywords = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
50
- keywords.rename(columns=col_dict, inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  return keywords
52
 
53
  st.header('File Checker', anchor=False)
54
  st.subheader('Put your file here...', anchor=False)
55
 
56
  #===read data===
57
- uploaded_file = st.file_uploader('', type=['csv','txt'], on_change=reset_data)
58
 
59
  if uploaded_file is not None:
60
  extype = get_ext(uploaded_file)
@@ -64,8 +111,15 @@ if uploaded_file is not None:
64
  elif extype.endswith('.txt'):
65
  data = conv_txt(extype)
66
 
67
- col1, col2, col3 = st.columns(3)
 
68
 
 
 
 
 
 
 
69
  with col1:
70
  #===check keywords===
71
  keycheck = list(data.columns)
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import json
4
+ from tools import sourceformat as sf
5
 
6
  #===config===
7
  st.set_page_config(
 
36
  @st.cache_data(ttl=3600)
37
  def upload(extype):
38
  keywords = pd.read_csv(uploaded_file)
39
+ if "dimensions" in uploaded_file.name.lower():
40
+ keywords = sf.dim(keywords)
41
+ col_dict = {'MeSH terms': 'Keywords',
42
+ 'PubYear': 'Year',
43
+ 'Times cited': 'Cited by',
44
+ 'Publication Type': 'Document Type'
45
+ }
46
+ keywords.rename(columns=col_dict, inplace=True)
47
  return keywords
48
 
49
  @st.cache_data(ttl=3600)
50
  def conv_txt(extype):
51
+ if("PMID" in (uploaded_file.read()).decode()):
52
+ uploaded_file.seek(0)
53
+ papers = sf.medline(uploaded_file)
54
+ print(papers)
55
+ return papers
56
  col_dict = {'TI': 'Title',
57
  'SO': 'Source title',
58
  'DE': 'Author Keywords',
 
60
  'AB': 'Abstract',
61
  'TC': 'Cited by',
62
  'PY': 'Year',
63
+ 'ID': 'Keywords Plus',
64
+ 'rights_date_used': 'Year'}
65
+ uploaded_file.seek(0)
66
+ papers = pd.read_csv(uploaded_file, sep='\t')
67
+ if("htid" in papers.columns):
68
+ papers = sf.htrc(papers)
69
+ papers.rename(columns=col_dict, inplace=True)
70
+ print(papers)
71
+ return papers
72
+
73
+
74
+ @st.cache_data(ttl=3600)
75
+ def conv_json(extype):
76
+ col_dict={'title': 'title',
77
+ 'rights_date_used': 'Year',
78
+ 'content_provider_code':'Source title'
79
+ }
80
+
81
+ data = json.load(uploaded_file)
82
+ hathifile = data['gathers']
83
+ keywords = pd.DataFrame.from_records(hathifile)
84
+
85
+ keywords = sf.htrc(keywords)
86
+ keywords['Cited by'] = keywords.groupby(['Keywords'])['Keywords'].transform('size')
87
+ keywords.rename(columns=col_dict,inplace=True)
88
+ return keywords
89
+
90
+ @st.cache_data(ttl=3600)
91
+ def conv_pub(extype):
92
+ if (get_ext(extype)).endswith('.tar.gz'):
93
+ bytedata = extype.read()
94
+ keywords = sf.readPub(bytedata)
95
+ elif (get_ext(extype)).endswith('.xml'):
96
+ bytedata = extype.read()
97
+ keywords = sf.readxml(bytedata)
98
  return keywords
99
 
100
  st.header('File Checker', anchor=False)
101
  st.subheader('Put your file here...', anchor=False)
102
 
103
  #===read data===
104
+ uploaded_file = st.file_uploader('', type=['csv','txt','json', 'tar.gz', 'xml'], on_change=reset_data)
105
 
106
  if uploaded_file is not None:
107
  extype = get_ext(uploaded_file)
 
111
  elif extype.endswith('.txt'):
112
  data = conv_txt(extype)
113
 
114
+ elif extype.endswith('.json'):
115
+ data = conv_json(extype)
116
 
117
+ elif extype.endswith('.tar.gz') or extype.endswith('.xml'):
118
+ data = conv_pub(uploaded_file)
119
+
120
+
121
+ col1, col2, col3 = st.columns(3)
122
+
123
  with col1:
124
  #===check keywords===
125
  keycheck = list(data.columns)