Spaces:

faizhalas
/

coconut

Running

App Files Files Community

faizhalas commited on Sep 30, 2025

Commit

5779612

verified ·

1 Parent(s): 2d38bac

Update pages/0 FileChecker.py

Browse files

Files changed (1) hide show

pages/0 FileChecker.py +59 -5

pages/0 FileChecker.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import streamlit as st
 import pandas as pd
 #===config===
 st.set_page_config(
@@ -34,10 +36,23 @@ def get_ext(extype):
 @st.cache_data(ttl=3600)
 def upload(extype):
     keywords = pd.read_csv(uploaded_file)
     return keywords
 @st.cache_data(ttl=3600)
 def conv_txt(extype):
     col_dict = {'TI': 'Title',
             'SO': 'Source title',
             'DE': 'Author Keywords',
@@ -45,16 +60,48 @@ def conv_txt(extype):
             'AB': 'Abstract',
             'TC': 'Cited by',
             'PY': 'Year',
-            'ID': 'Keywords Plus'}
-    keywords = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
-    keywords.rename(columns=col_dict, inplace=True)
     return keywords
 st.header('File Checker', anchor=False)
 st.subheader('Put your file here...', anchor=False)
 #===read data===
-uploaded_file = st.file_uploader('', type=['csv','txt'], on_change=reset_data)
 if uploaded_file is not None:
     extype = get_ext(uploaded_file)
@@ -64,8 +111,15 @@ if uploaded_file is not None:
     elif extype.endswith('.txt'):
         data = conv_txt(extype)
-    col1, col2, col3 = st.columns(3)
     with col1:
         #===check keywords===
         keycheck = list(data.columns)

 import streamlit as st
 import pandas as pd
+import json
+from tools import sourceformat as sf
 #===config===
 st.set_page_config(
 @st.cache_data(ttl=3600)
 def upload(extype):
     keywords = pd.read_csv(uploaded_file)
+    if "dimensions" in uploaded_file.name.lower():
+        keywords = sf.dim(keywords)
+        col_dict = {'MeSH terms': 'Keywords',
+        'PubYear': 'Year',
+        'Times cited': 'Cited by',
+        'Publication Type': 'Document Type'
+        }
+        keywords.rename(columns=col_dict, inplace=True)
     return keywords
 @st.cache_data(ttl=3600)
 def conv_txt(extype):
+    if("PMID" in (uploaded_file.read()).decode()):
+        uploaded_file.seek(0)
+        papers = sf.medline(uploaded_file)
+        print(papers)
+        return papers
     col_dict = {'TI': 'Title',
             'SO': 'Source title',
             'DE': 'Author Keywords',
             'AB': 'Abstract',
             'TC': 'Cited by',
             'PY': 'Year',
+            'ID': 'Keywords Plus',
+            'rights_date_used': 'Year'}
+    uploaded_file.seek(0)
+    papers = pd.read_csv(uploaded_file, sep='\t')
+    if("htid" in papers.columns):
+        papers = sf.htrc(papers)
+    papers.rename(columns=col_dict, inplace=True)
+    print(papers)
+    return papers
+@st.cache_data(ttl=3600)
+def conv_json(extype):
+    col_dict={'title': 'title',
+    'rights_date_used': 'Year',
+    'content_provider_code':'Source title'
+    }
+    data = json.load(uploaded_file)
+    hathifile = data['gathers']
+    keywords = pd.DataFrame.from_records(hathifile)
+    keywords = sf.htrc(keywords)
+    keywords['Cited by'] = keywords.groupby(['Keywords'])['Keywords'].transform('size')
+    keywords.rename(columns=col_dict,inplace=True)
+    return keywords
+@st.cache_data(ttl=3600)
+def conv_pub(extype):
+    if (get_ext(extype)).endswith('.tar.gz'):
+        bytedata = extype.read()
+        keywords = sf.readPub(bytedata)
+    elif (get_ext(extype)).endswith('.xml'):
+        bytedata = extype.read()
+        keywords = sf.readxml(bytedata)
     return keywords
 st.header('File Checker', anchor=False)
 st.subheader('Put your file here...', anchor=False)
 #===read data===
+uploaded_file = st.file_uploader('', type=['csv','txt','json', 'tar.gz', 'xml'], on_change=reset_data)
 if uploaded_file is not None:
     extype = get_ext(uploaded_file)
     elif extype.endswith('.txt'):
         data = conv_txt(extype)
+    elif extype.endswith('.json'):
+        data = conv_json(extype)
+    elif extype.endswith('.tar.gz') or extype.endswith('.xml'):
+        data = conv_pub(uploaded_file)
+    col1, col2, col3 = st.columns(3)
     with col1:
         #===check keywords===
         keycheck = list(data.columns)