perpusdb / Home.py
faizhalas's picture
Update Home.py
9875a65 verified
# Import libraries
import streamlit as st
import pandas as pd
import numpy as np
import re
from eldar import Query
import altair as alt
from itertools import combinations
from collections import Counter
#===config===
st.set_page_config(
page_title="TXTperpus",
page_icon="https://github.com/faizhalas/Search4All/blob/main/images/logo.png?raw=true",
layout="wide",
initial_sidebar_state="collapsed"
)
# Connect to the Google Sheet
st.cache_resource(ttl=3600*3)
def connect_gsheet():
sheet_id = st.secrets.sheet_id
sheet_name = st.secrets.sheet_journal
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
df = pd.read_csv(url, dtype=str, header=0)
df = df.sort_index(ascending=False).fillna('NaN')
df["Full-text"] = df[["Abstract (en)", "Introduction", "Method", "Result & Discussion", "Conclusion"]].agg(" - ".join, axis=1)
return df
df = connect_gsheet()
#st.write(df)
df = df.replace("NaN", "Tidak tersedia")
#Title
st.title('TXTperpus: Pencarian')
# Intro text
st.caption(f"Eksplorasi dari **{df.shape[0]}** artikel yang dihimpun oleh TXTperpus.")
c1, c2 = st.columns([6,4])
# The search bar
text_search = c1.text_input("Cari berdasarkan penulis, judul, hingga full-text. Mendukung operator boolean.", help="Silahkan munggunakan OR, AND, dan NOT dalam huruf kapital. Dapat menggunakan tanda kurung '()' dan asterisk '*' bila dibutuhkan.")
# Get keywords from search bar
#keyword_list_j = [keyword.strip() for keyword in text_search.split(";")]
# option to choose
part_opt = ["Title", "Abstract (en)", "Abstract (id)", "Keywords", "Introduction", "Method", "Result", "Discussion", "Result & Discussion", "Conclusion", "Full-text"]
# Add options
search_opt = c2.multiselect(
"Pilih kolom",
part_opt,
["Title", "Abstract (en)", "Keywords"])
try:
@st.cache_data
def search_data(text_search, search_opt):
key_df_j = df
key_df_j["__combined__"] = key_df_j[search_opt].fillna("").agg(" ".join, axis=1)
query = Query(text_search, ignore_case=True)
key_df_j["df_result"] = key_df_j["__combined__"].apply(query)
res_df = key_df_j[key_df_j["df_result"]].copy()
return res_df
key_df_j = search_data(text_search, search_opt)
#st.write(key_df_j)
# creating result
if text_search:
st.caption(f"Kami menemukan **{key_df_j.shape[0]}** artikel yang sesuai dengan apa yang dicari.")
cl1, cl2 = st.columns([7,3])
selection = cl1.radio("Pilih menu", ("Hasil Pencarian", "Analisis Hasil"), horizontal=True)
with cl2.popover("Unduh Hasil Pencarian"):
# checkboxes in 3 columns
options = part_opt[:-1] # remove last one
cols = st.columns(3)
selected = []
for i, opt in enumerate(options):
if cols[i % 3].checkbox(opt):
selected.append(opt)
st.write("Selected:", selected)
@st.cache_data
def get_data():
df = pd.DataFrame(
np.random.randn(50, 20),
columns=("col %d" % i for i in range(20))
)
return df
@st.cache_data
def convert_for_download(dfs):
return dfs.to_csv(index=False).encode("utf-8")
dfs = get_data()
csv = convert_for_download(dfs)
st.download_button(
label="Download CSV",
data=csv,
file_name="txtperpus.csv",
mime="text/csv",
icon=":material/download:",
)
st.divider()
if selection == "Hasil Pencarian":
for _, row in key_df_j.reset_index().iterrows():
with st.container(border=True):
col1, col2 = st.columns([7,3])
col1.markdown(f"**{row['Title'].strip()}**")
col2.caption(f"**{row['Journal'].strip()}, {row['Volume'].strip()} ({row['Issue'].strip()}) - {row['Year'].strip()}**")
st.markdown(f"*{row['Authors'].strip()}*")
btn1, btn2, btn3, btn4 = st.columns(4)
popover1 = btn1.popover("Abstrak", icon="🔤", help="Klik untuk membaca abstrak dalam Bahasa Indonesia", use_container_width=False)
popover1.write(row["Abstract (id)"].strip() if row['Abstract (id)'] else "Abstrak tidak tersedia.")
popover2 = btn2.popover("Abstract", icon="🔤", help="Klik untuk membaca abstrak dalam Bahasa Inggris", use_container_width=False)
popover2.write(row['Abstract (en)'].strip() if row['Abstract (en)'] else "Abstrak tidak tersedia.")
btn3.link_button("Baca Artikel", f"https://faizhalas-perpusdb.hf.space/reader?art={row['ID'].strip()}", icon="📄")
title = row['Title'].strip()
title_query = title.replace(" ", "+")
scholar_url = f"https://scholar.google.com/scholar?hl=id&as_sdt=0%2C5&q={title_query}"
btn4.link_button("Sumber", scholar_url, icon="🌐")
else:
st.write("Jurnal")
jrnl_counts = key_df_j["Journal"].value_counts().sort_index()
jrnl_counts_df = pd.DataFrame({"Journal": jrnl_counts.index, "Count": jrnl_counts.values})
jrnl_counts_df = jrnl_counts_df.set_index("Journal")
# Display bar chart
#st.bar_chart(jrnl_counts_df)
st.write("Tahun")
year_counts = key_df_j["Year"].value_counts().sort_index()
year_counts_df = pd.DataFrame({"Year": year_counts.index, "Count": year_counts.values})
year_counts_df = year_counts_df.set_index("Year")
# Display bar chart
#st.bar_chart(year_counts_df)
institutions = (
key_df_j["Institution"]
.str.split(",") # split by comma
.explode() # expand into rows
.str.strip() # remove extra spaces
)
st.write("Institusi")
# Count occurrences
inst_counts = institutions.value_counts().reset_index()
inst_counts.columns = ["Institution", "Count"]
# Horizontal bar chart with Altair
chart_ins = (
alt.Chart(inst_counts)
.mark_arc()
.encode(
theta="Count:Q", # besar irisan
color="Institution:N", # warna berdasarkan institusi
tooltip=["Institution", "Count"] # tampilkan info saat hover
)
)
#st.altair_chart(chart_ins, use_container_width=True)
author = (
key_df_j["Authors"]
.str.split(",") # split by comma
.explode() # expand into rows
.str.strip() # remove extra spaces
)
st.write("Author")
# Count occurrences
aut_counts = author.value_counts().reset_index()
aut_counts.columns = ["Authors", "Count"]
# Horizontal bar chart with Altair
chart_aut = (
alt.Chart(aut_counts)
.mark_bar()
.encode(
x="Count:Q",
y=alt.Y("Authors:N", sort='-x') # sort descending
)
)
#st.altair_chart(chart_aut, use_container_width=True)
from wordcloud import WordCloud
import matplotlib.pyplot as plt
keywords = (
key_df_j["Keywords"]
.str.split(";")
.explode()
.str.strip()
)
# Count frequencies
keyword_counts = keywords.value_counts().to_dict()
# --- Generate WordCloud ---
wordcloud = WordCloud(
width=500,
height=500,
background_color="white"
).generate_from_frequencies(keyword_counts)
# Display in Streamlit
fig, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wordcloud, interpolation="bilinear")
ax.axis("off")
#st.pyplot(fig)
st.write(" ")
#heatmap institusi
pairs = []
for row in key_df_j["Institution"]:
cities = [c.strip() for c in row.replace(";", ",").split(",")]
for c1, c2 in combinations(cities, 2):
pairs.append(tuple(sorted([c1, c2])))
# Hitung frekuensi
counter = Counter(pairs)
# Ubah jadi DataFrame pasangan
df_pairs = pd.DataFrame(
[(c1, c2, count) for (c1, c2), count in counter.items()],
columns=["Institusi 1", "Institusi 2", "Jumlah"]
)
# Heatmap Altair
heatmap = (
alt.Chart(df_pairs)
.mark_rect()
.encode(
x=alt.X("Institusi 1:N", sort=None),
y=alt.Y("Institusi 2:N", sort=None),
color=alt.Color("Jumlah:Q", scale=alt.Scale(scheme="blues")),
tooltip=["Institusi 1", "Institusi 2", "Jumlah"]
)
)
#st.altair_chart(heatmap, use_container_width=True)
# layout
col1, col2 = st.columns(2)
with col1.container(border=True, gap="medium", height=520):
st.bar_chart(jrnl_counts_df, use_container_width=True, horizontal=True, width=500, height=500)
with col2.container(border=True, gap="medium", height=520):
st.line_chart(year_counts_df, use_container_width=True, width=500, height=500)
with col1.container(border=True, gap="medium", height=520):
st.altair_chart(chart_ins, use_container_width=True)
with col2.container(border=True, gap="medium", height=520):
st.altair_chart(chart_aut, use_container_width=True)
with col1.container(border=True, gap="medium", height=520):
st.pyplot(fig)
with col2.container(border=True, gap="medium", height=520):
st.altair_chart(heatmap, use_container_width=True)
#except Exception as e:
#st.write(e)
except IndexError:
st.write("Mulai dengan ketikan kata kunci pada kotak pencarian.")