|
|
|
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import re |
|
|
from eldar import Query |
|
|
import altair as alt |
|
|
from itertools import combinations |
|
|
from collections import Counter |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="TXTperpus", |
|
|
page_icon="https://github.com/faizhalas/Search4All/blob/main/images/logo.png?raw=true", |
|
|
layout="wide", |
|
|
initial_sidebar_state="collapsed" |
|
|
) |
|
|
|
|
|
|
|
|
st.cache_resource(ttl=3600*3) |
|
|
def connect_gsheet(): |
|
|
sheet_id = st.secrets.sheet_id |
|
|
sheet_name = st.secrets.sheet_journal |
|
|
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}" |
|
|
df = pd.read_csv(url, dtype=str, header=0) |
|
|
df = df.sort_index(ascending=False).fillna('NaN') |
|
|
df["Full-text"] = df[["Abstract (en)", "Introduction", "Method", "Result & Discussion", "Conclusion"]].agg(" - ".join, axis=1) |
|
|
return df |
|
|
|
|
|
df = connect_gsheet() |
|
|
|
|
|
|
|
|
df = df.replace("NaN", "Tidak tersedia") |
|
|
|
|
|
|
|
|
st.title('TXTperpus: Pencarian') |
|
|
|
|
|
|
|
|
st.caption(f"Eksplorasi dari **{df.shape[0]}** artikel yang dihimpun oleh TXTperpus.") |
|
|
c1, c2 = st.columns([6,4]) |
|
|
|
|
|
|
|
|
text_search = c1.text_input("Cari berdasarkan penulis, judul, hingga full-text. Mendukung operator boolean.", help="Silahkan munggunakan OR, AND, dan NOT dalam huruf kapital. Dapat menggunakan tanda kurung '()' dan asterisk '*' bila dibutuhkan.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
part_opt = ["Title", "Abstract (en)", "Abstract (id)", "Keywords", "Introduction", "Method", "Result", "Discussion", "Result & Discussion", "Conclusion", "Full-text"] |
|
|
|
|
|
|
|
|
search_opt = c2.multiselect( |
|
|
"Pilih kolom", |
|
|
part_opt, |
|
|
["Title", "Abstract (en)", "Keywords"]) |
|
|
|
|
|
try: |
|
|
@st.cache_data |
|
|
def search_data(text_search, search_opt): |
|
|
key_df_j = df |
|
|
key_df_j["__combined__"] = key_df_j[search_opt].fillna("").agg(" ".join, axis=1) |
|
|
query = Query(text_search, ignore_case=True) |
|
|
key_df_j["df_result"] = key_df_j["__combined__"].apply(query) |
|
|
res_df = key_df_j[key_df_j["df_result"]].copy() |
|
|
|
|
|
return res_df |
|
|
|
|
|
key_df_j = search_data(text_search, search_opt) |
|
|
|
|
|
|
|
|
|
|
|
if text_search: |
|
|
|
|
|
st.caption(f"Kami menemukan **{key_df_j.shape[0]}** artikel yang sesuai dengan apa yang dicari.") |
|
|
|
|
|
cl1, cl2 = st.columns([7,3]) |
|
|
|
|
|
selection = cl1.radio("Pilih menu", ("Hasil Pencarian", "Analisis Hasil"), horizontal=True) |
|
|
|
|
|
with cl2.popover("Unduh Hasil Pencarian"): |
|
|
|
|
|
options = part_opt[:-1] |
|
|
cols = st.columns(3) |
|
|
selected = [] |
|
|
for i, opt in enumerate(options): |
|
|
if cols[i % 3].checkbox(opt): |
|
|
selected.append(opt) |
|
|
|
|
|
st.write("Selected:", selected) |
|
|
|
|
|
@st.cache_data |
|
|
def get_data(): |
|
|
df = pd.DataFrame( |
|
|
np.random.randn(50, 20), |
|
|
columns=("col %d" % i for i in range(20)) |
|
|
) |
|
|
return df |
|
|
|
|
|
@st.cache_data |
|
|
def convert_for_download(dfs): |
|
|
return dfs.to_csv(index=False).encode("utf-8") |
|
|
|
|
|
dfs = get_data() |
|
|
csv = convert_for_download(dfs) |
|
|
|
|
|
st.download_button( |
|
|
label="Download CSV", |
|
|
data=csv, |
|
|
file_name="txtperpus.csv", |
|
|
mime="text/csv", |
|
|
icon=":material/download:", |
|
|
) |
|
|
|
|
|
st.divider() |
|
|
|
|
|
if selection == "Hasil Pencarian": |
|
|
|
|
|
for _, row in key_df_j.reset_index().iterrows(): |
|
|
with st.container(border=True): |
|
|
col1, col2 = st.columns([7,3]) |
|
|
col1.markdown(f"**{row['Title'].strip()}**") |
|
|
col2.caption(f"**{row['Journal'].strip()}, {row['Volume'].strip()} ({row['Issue'].strip()}) - {row['Year'].strip()}**") |
|
|
|
|
|
st.markdown(f"*{row['Authors'].strip()}*") |
|
|
|
|
|
btn1, btn2, btn3, btn4 = st.columns(4) |
|
|
popover1 = btn1.popover("Abstrak", icon="🔤", help="Klik untuk membaca abstrak dalam Bahasa Indonesia", use_container_width=False) |
|
|
popover1.write(row["Abstract (id)"].strip() if row['Abstract (id)'] else "Abstrak tidak tersedia.") |
|
|
|
|
|
popover2 = btn2.popover("Abstract", icon="🔤", help="Klik untuk membaca abstrak dalam Bahasa Inggris", use_container_width=False) |
|
|
popover2.write(row['Abstract (en)'].strip() if row['Abstract (en)'] else "Abstrak tidak tersedia.") |
|
|
|
|
|
btn3.link_button("Baca Artikel", f"https://faizhalas-perpusdb.hf.space/reader?art={row['ID'].strip()}", icon="📄") |
|
|
|
|
|
title = row['Title'].strip() |
|
|
title_query = title.replace(" ", "+") |
|
|
scholar_url = f"https://scholar.google.com/scholar?hl=id&as_sdt=0%2C5&q={title_query}" |
|
|
|
|
|
btn4.link_button("Sumber", scholar_url, icon="🌐") |
|
|
|
|
|
else: |
|
|
st.write("Jurnal") |
|
|
jrnl_counts = key_df_j["Journal"].value_counts().sort_index() |
|
|
jrnl_counts_df = pd.DataFrame({"Journal": jrnl_counts.index, "Count": jrnl_counts.values}) |
|
|
jrnl_counts_df = jrnl_counts_df.set_index("Journal") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.write("Tahun") |
|
|
year_counts = key_df_j["Year"].value_counts().sort_index() |
|
|
year_counts_df = pd.DataFrame({"Year": year_counts.index, "Count": year_counts.values}) |
|
|
year_counts_df = year_counts_df.set_index("Year") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
institutions = ( |
|
|
key_df_j["Institution"] |
|
|
.str.split(",") |
|
|
.explode() |
|
|
.str.strip() |
|
|
) |
|
|
|
|
|
st.write("Institusi") |
|
|
|
|
|
inst_counts = institutions.value_counts().reset_index() |
|
|
inst_counts.columns = ["Institution", "Count"] |
|
|
|
|
|
|
|
|
chart_ins = ( |
|
|
alt.Chart(inst_counts) |
|
|
.mark_arc() |
|
|
.encode( |
|
|
theta="Count:Q", |
|
|
color="Institution:N", |
|
|
tooltip=["Institution", "Count"] |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
author = ( |
|
|
key_df_j["Authors"] |
|
|
.str.split(",") |
|
|
.explode() |
|
|
.str.strip() |
|
|
) |
|
|
|
|
|
st.write("Author") |
|
|
|
|
|
aut_counts = author.value_counts().reset_index() |
|
|
aut_counts.columns = ["Authors", "Count"] |
|
|
|
|
|
|
|
|
chart_aut = ( |
|
|
alt.Chart(aut_counts) |
|
|
.mark_bar() |
|
|
.encode( |
|
|
x="Count:Q", |
|
|
y=alt.Y("Authors:N", sort='-x') |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from wordcloud import WordCloud |
|
|
import matplotlib.pyplot as plt |
|
|
|
|
|
|
|
|
keywords = ( |
|
|
key_df_j["Keywords"] |
|
|
.str.split(";") |
|
|
.explode() |
|
|
.str.strip() |
|
|
) |
|
|
|
|
|
|
|
|
keyword_counts = keywords.value_counts().to_dict() |
|
|
|
|
|
|
|
|
wordcloud = WordCloud( |
|
|
width=500, |
|
|
height=500, |
|
|
background_color="white" |
|
|
).generate_from_frequencies(keyword_counts) |
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 5)) |
|
|
ax.imshow(wordcloud, interpolation="bilinear") |
|
|
ax.axis("off") |
|
|
|
|
|
st.write(" ") |
|
|
|
|
|
|
|
|
|
|
|
pairs = [] |
|
|
|
|
|
for row in key_df_j["Institution"]: |
|
|
cities = [c.strip() for c in row.replace(";", ",").split(",")] |
|
|
for c1, c2 in combinations(cities, 2): |
|
|
pairs.append(tuple(sorted([c1, c2]))) |
|
|
|
|
|
|
|
|
counter = Counter(pairs) |
|
|
|
|
|
|
|
|
df_pairs = pd.DataFrame( |
|
|
[(c1, c2, count) for (c1, c2), count in counter.items()], |
|
|
columns=["Institusi 1", "Institusi 2", "Jumlah"] |
|
|
) |
|
|
|
|
|
|
|
|
heatmap = ( |
|
|
alt.Chart(df_pairs) |
|
|
.mark_rect() |
|
|
.encode( |
|
|
x=alt.X("Institusi 1:N", sort=None), |
|
|
y=alt.Y("Institusi 2:N", sort=None), |
|
|
color=alt.Color("Jumlah:Q", scale=alt.Scale(scheme="blues")), |
|
|
tooltip=["Institusi 1", "Institusi 2", "Jumlah"] |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
with col1.container(border=True, gap="medium", height=520): |
|
|
st.bar_chart(jrnl_counts_df, use_container_width=True, horizontal=True, width=500, height=500) |
|
|
with col2.container(border=True, gap="medium", height=520): |
|
|
st.line_chart(year_counts_df, use_container_width=True, width=500, height=500) |
|
|
with col1.container(border=True, gap="medium", height=520): |
|
|
st.altair_chart(chart_ins, use_container_width=True) |
|
|
with col2.container(border=True, gap="medium", height=520): |
|
|
st.altair_chart(chart_aut, use_container_width=True) |
|
|
with col1.container(border=True, gap="medium", height=520): |
|
|
st.pyplot(fig) |
|
|
with col2.container(border=True, gap="medium", height=520): |
|
|
st.altair_chart(heatmap, use_container_width=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except IndexError: |
|
|
st.write("Mulai dengan ketikan kata kunci pada kotak pencarian.") |