File size: 11,040 Bytes
f296266
 
 
 
 
70ce674
e9ccc04
bd2b349
 
e9ccc04
f296266
 
 
 
5b10b66
f296266
34a8e5f
 
f296266
 
 
 
 
 
 
 
 
 
7649758
f296266
 
 
 
d920d55
aaddc56
86fc720
f296266
9061c75
f296266
 
d920d55
83a7585
f296266
 
a14eddb
f296266
 
70ce674
f296266
 
83a7585
f296266
 
83a7585
d920d55
f296266
74135d8
f296266
a14eddb
70ce674
eaef838
24385b9
eaef838
70ce674
 
e087be7
70ce674
e087be7
70ce674
eaef838
2fd9ed6
d920d55
 
 
b0e4995
7dc39d1
b0e4995
 
baf54ec
 
 
00fb7ba
f51ee57
 
 
00fb7ba
39b17b4
00fb7ba
 
267a29d
f51ee57
 
 
 
 
 
 
 
 
267a29d
f51ee57
 
 
 
 
 
 
 
 
 
 
 
 
 
6b17824
 
ade1543
 
d920d55
ade1543
 
 
40b6f91
 
ade1543
70ffdc3
ade1543
 
 
0a8b6ee
f497a35
ade1543
 
 
 
0a8b6ee
70ffdc3
0a8b6ee
 
 
 
ade1543
 
e9ccc04
898615c
ecfb448
e9ccc04
 
 
898615c
e9ccc04
 
898615c
e9ccc04
 
 
 
898615c
e9ccc04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4547bf9
e9ccc04
4547bf9
 
 
e9ccc04
 
 
898615c
e9ccc04
 
 
 
 
 
 
 
 
 
 
898615c
e9ccc04
 
 
 
6dedf5b
e9ccc04
 
 
 
 
 
 
898615c
e9ccc04
ecfb448
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
971375e
 
ecfb448
 
 
 
 
 
 
562c49e
6dedf5b
ecfb448
bd2b349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562c49e
bd2b349
898615c
 
 
 
bd2b349
91d31f2
bd2b349
91d31f2
bd2b349
898615c
bd2b349
898615c
562c49e
 
 
 
898615c
 
e9ccc04
a14eddb
9875a65
 
2fd9ed6
9875a65
2fd9ed6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# Import libraries
import streamlit as st
import pandas as pd
import numpy as np
import re
from eldar import Query
import altair as alt
from itertools import combinations
from collections import Counter



#===config===
st.set_page_config(
     page_title="TXTperpus",
     page_icon="https://github.com/faizhalas/Search4All/blob/main/images/logo.png?raw=true",
     layout="wide",
    initial_sidebar_state="collapsed"
)

# Connect to the Google Sheet
st.cache_resource(ttl=3600*3)
def connect_gsheet():
  sheet_id = st.secrets.sheet_id
  sheet_name = st.secrets.sheet_journal
  url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
  df = pd.read_csv(url, dtype=str, header=0)
  df = df.sort_index(ascending=False).fillna('NaN')
  df["Full-text"] = df[["Abstract (en)", "Introduction", "Method", "Result & Discussion", "Conclusion"]].agg(" - ".join, axis=1)
  return df

df = connect_gsheet()

#st.write(df)
df = df.replace("NaN", "Tidak tersedia")

#Title
st.title('TXTperpus: Pencarian')

# Intro text
st.caption(f"Eksplorasi dari **{df.shape[0]}** artikel yang dihimpun oleh TXTperpus.")
c1, c2 = st.columns([6,4])

# The search bar
text_search = c1.text_input("Cari berdasarkan penulis, judul, hingga full-text. Mendukung operator boolean.", help="Silahkan munggunakan OR, AND, dan NOT dalam huruf kapital. Dapat menggunakan tanda kurung '()' dan asterisk '*' bila dibutuhkan.")

# Get keywords from search bar
#keyword_list_j = [keyword.strip() for keyword in text_search.split(";")]

# option to choose
part_opt = ["Title", "Abstract (en)", "Abstract (id)", "Keywords", "Introduction", "Method", "Result", "Discussion", "Result & Discussion", "Conclusion", "Full-text"]

# Add options
search_opt = c2.multiselect(
     "Pilih kolom",
     part_opt,
     ["Title", "Abstract (en)", "Keywords"])

try:        
    @st.cache_data
    def search_data(text_search, search_opt):
        key_df_j = df
        key_df_j["__combined__"] = key_df_j[search_opt].fillna("").agg(" ".join, axis=1)
        query = Query(text_search, ignore_case=True)
        key_df_j["df_result"] = key_df_j["__combined__"].apply(query)
        res_df = key_df_j[key_df_j["df_result"]].copy()
        
        return res_df

    key_df_j = search_data(text_search, search_opt)
    #st.write(key_df_j)
    
    # creating result
    if text_search:
        
        st.caption(f"Kami menemukan **{key_df_j.shape[0]}** artikel yang sesuai dengan apa yang dicari.")

        cl1, cl2 = st.columns([7,3])

        selection = cl1.radio("Pilih menu", ("Hasil Pencarian", "Analisis Hasil"), horizontal=True)
        
        with cl2.popover("Unduh Hasil Pencarian"):
            # checkboxes in 3 columns
            options = part_opt[:-1]  # remove last one
            cols = st.columns(3)
            selected = []
            for i, opt in enumerate(options):
                if cols[i % 3].checkbox(opt):
                    selected.append(opt)
        
            st.write("Selected:", selected)
        
            @st.cache_data
            def get_data():
                df = pd.DataFrame(
                    np.random.randn(50, 20),
                    columns=("col %d" % i for i in range(20))
                )
                return df
        
            @st.cache_data
            def convert_for_download(dfs):
                return dfs.to_csv(index=False).encode("utf-8")
        
            dfs = get_data()
            csv = convert_for_download(dfs)
        
            st.download_button(
                label="Download CSV",
                data=csv,
                file_name="txtperpus.csv",
                mime="text/csv",
                icon=":material/download:",
            )
        
        st.divider()

        if selection == "Hasil Pencarian":
        
            for _, row in key_df_j.reset_index().iterrows():
                with st.container(border=True):
                    col1, col2 = st.columns([7,3])
                    col1.markdown(f"**{row['Title'].strip()}**")
                    col2.caption(f"**{row['Journal'].strip()}, {row['Volume'].strip()} ({row['Issue'].strip()}) - {row['Year'].strip()}**")
                    
                    st.markdown(f"*{row['Authors'].strip()}*")
        
                    btn1, btn2, btn3, btn4 = st.columns(4)
                    popover1 = btn1.popover("Abstrak", icon="🔤", help="Klik untuk membaca abstrak dalam Bahasa Indonesia", use_container_width=False)
                    popover1.write(row["Abstract (id)"].strip() if row['Abstract (id)'] else "Abstrak tidak tersedia.")
    
                    popover2 = btn2.popover("Abstract", icon="🔤", help="Klik untuk membaca abstrak dalam Bahasa Inggris", use_container_width=False)
                    popover2.write(row['Abstract (en)'].strip() if row['Abstract (en)'] else "Abstrak tidak tersedia.")
                    
                    btn3.link_button("Baca Artikel", f"https://faizhalas-perpusdb.hf.space/reader?art={row['ID'].strip()}", icon="📄")
                    
                    title = row['Title'].strip()
                    title_query = title.replace(" ", "+")
                    scholar_url = f"https://scholar.google.com/scholar?hl=id&as_sdt=0%2C5&q={title_query}"
                    
                    btn4.link_button("Sumber", scholar_url, icon="🌐")

        else:
            st.write("Jurnal")
            jrnl_counts = key_df_j["Journal"].value_counts().sort_index()
            jrnl_counts_df = pd.DataFrame({"Journal": jrnl_counts.index, "Count": jrnl_counts.values})
            jrnl_counts_df = jrnl_counts_df.set_index("Journal")
            
            # Display bar chart
            #st.bar_chart(jrnl_counts_df)

            st.write("Tahun")
            year_counts = key_df_j["Year"].value_counts().sort_index()
            year_counts_df = pd.DataFrame({"Year": year_counts.index, "Count": year_counts.values})
            year_counts_df = year_counts_df.set_index("Year")
            
            # Display bar chart
            #st.bar_chart(year_counts_df)

            institutions = (
                key_df_j["Institution"]
                .str.split(",")             # split by comma
                .explode()                  # expand into rows
                .str.strip()                # remove extra spaces
            )

            st.write("Institusi")
            # Count occurrences
            inst_counts = institutions.value_counts().reset_index()
            inst_counts.columns = ["Institution", "Count"]

            # Horizontal bar chart with Altair
            chart_ins = (
                alt.Chart(inst_counts)
                .mark_arc()
                .encode(
                    theta="Count:Q",             # besar irisan
                    color="Institution:N",       # warna berdasarkan institusi
                    tooltip=["Institution", "Count"]  # tampilkan info saat hover
                )
            )
            
            #st.altair_chart(chart_ins, use_container_width=True)


            author = (
                key_df_j["Authors"]
                .str.split(",")             # split by comma
                .explode()                  # expand into rows
                .str.strip()                # remove extra spaces
            )

            st.write("Author")
            # Count occurrences
            aut_counts = author.value_counts().reset_index()
            aut_counts.columns = ["Authors", "Count"]

            # Horizontal bar chart with Altair
            chart_aut = (
                alt.Chart(aut_counts)
                .mark_bar()
                .encode(
                    x="Count:Q",
                    y=alt.Y("Authors:N", sort='-x')  # sort descending
                )
            )
            
            #st.altair_chart(chart_aut, use_container_width=True)




            from wordcloud import WordCloud
            import matplotlib.pyplot as plt
            
            
            keywords = (
                key_df_j["Keywords"]
                .str.split(";")
                .explode()
                .str.strip()
            )
            
            # Count frequencies
            keyword_counts = keywords.value_counts().to_dict()
            
            # --- Generate WordCloud ---
            wordcloud = WordCloud(
                width=500,
                height=500,
                background_color="white"
            ).generate_from_frequencies(keyword_counts)
            
            # Display in Streamlit
            fig, ax = plt.subplots(figsize=(10, 5))
            ax.imshow(wordcloud, interpolation="bilinear")
            ax.axis("off")
            #st.pyplot(fig)
            st.write(" ")

            
            #heatmap institusi
            pairs = []

            for row in key_df_j["Institution"]:
                cities = [c.strip() for c in row.replace(";", ",").split(",")]
                for c1, c2 in combinations(cities, 2):
                    pairs.append(tuple(sorted([c1, c2])))
            
            # Hitung frekuensi
            counter = Counter(pairs)
            
            # Ubah jadi DataFrame pasangan
            df_pairs = pd.DataFrame(
                [(c1, c2, count) for (c1, c2), count in counter.items()],
                columns=["Institusi 1", "Institusi 2", "Jumlah"]
            )
            
            # Heatmap Altair
            heatmap = (
                alt.Chart(df_pairs)
                .mark_rect()
                .encode(
                    x=alt.X("Institusi 1:N", sort=None),
                    y=alt.Y("Institusi 2:N", sort=None),
                    color=alt.Color("Jumlah:Q", scale=alt.Scale(scheme="blues")),
                    tooltip=["Institusi 1", "Institusi 2", "Jumlah"]
                )
            )
            
            #st.altair_chart(heatmap, use_container_width=True)



            # layout
            col1, col2 = st.columns(2)
            with col1.container(border=True, gap="medium", height=520):
                st.bar_chart(jrnl_counts_df, use_container_width=True, horizontal=True, width=500, height=500)
            with col2.container(border=True, gap="medium", height=520):
                st.line_chart(year_counts_df, use_container_width=True, width=500, height=500)
            with col1.container(border=True, gap="medium", height=520):
                st.altair_chart(chart_ins, use_container_width=True)
            with col2.container(border=True, gap="medium", height=520):
                st.altair_chart(chart_aut, use_container_width=True)
            with col1.container(border=True, gap="medium", height=520):
                st.pyplot(fig)
            with col2.container(border=True, gap="medium", height=520):
                st.altair_chart(heatmap, use_container_width=True)
                

            

#except Exception as e:
        #st.write(e)

except IndexError:
        st.write("Mulai dengan ketikan kata kunci pada kotak pencarian.")