# Import libraries import streamlit as st import pandas as pd import numpy as np import re from eldar import Query import altair as alt from itertools import combinations from collections import Counter #===config=== st.set_page_config( page_title="TXTperpus", page_icon="https://github.com/faizhalas/Search4All/blob/main/images/logo.png?raw=true", layout="wide", initial_sidebar_state="collapsed" ) # Connect to the Google Sheet st.cache_resource(ttl=3600*3) def connect_gsheet(): sheet_id = st.secrets.sheet_id sheet_name = st.secrets.sheet_journal url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}" df = pd.read_csv(url, dtype=str, header=0) df = df.sort_index(ascending=False).fillna('NaN') df["Full-text"] = df[["Abstract (en)", "Introduction", "Method", "Result & Discussion", "Conclusion"]].agg(" - ".join, axis=1) return df df = connect_gsheet() #st.write(df) df = df.replace("NaN", "Tidak tersedia") #Title st.title('TXTperpus: Pencarian') # Intro text st.caption(f"Eksplorasi dari **{df.shape[0]}** artikel yang dihimpun oleh TXTperpus.") c1, c2 = st.columns([6,4]) # The search bar text_search = c1.text_input("Cari berdasarkan penulis, judul, hingga full-text. Mendukung operator boolean.", help="Silahkan munggunakan OR, AND, dan NOT dalam huruf kapital. Dapat menggunakan tanda kurung '()' dan asterisk '*' bila dibutuhkan.") # Get keywords from search bar #keyword_list_j = [keyword.strip() for keyword in text_search.split(";")] # option to choose part_opt = ["Title", "Abstract (en)", "Abstract (id)", "Keywords", "Introduction", "Method", "Result", "Discussion", "Result & Discussion", "Conclusion", "Full-text"] # Add options search_opt = c2.multiselect( "Pilih kolom", part_opt, ["Title", "Abstract (en)", "Keywords"]) try: @st.cache_data def search_data(text_search, search_opt): key_df_j = df key_df_j["__combined__"] = key_df_j[search_opt].fillna("").agg(" ".join, axis=1) query = Query(text_search, ignore_case=True) key_df_j["df_result"] = key_df_j["__combined__"].apply(query) res_df = key_df_j[key_df_j["df_result"]].copy() return res_df key_df_j = search_data(text_search, search_opt) #st.write(key_df_j) # creating result if text_search: st.caption(f"Kami menemukan **{key_df_j.shape[0]}** artikel yang sesuai dengan apa yang dicari.") cl1, cl2 = st.columns([7,3]) selection = cl1.radio("Pilih menu", ("Hasil Pencarian", "Analisis Hasil"), horizontal=True) with cl2.popover("Unduh Hasil Pencarian"): # checkboxes in 3 columns options = part_opt[:-1] # remove last one cols = st.columns(3) selected = [] for i, opt in enumerate(options): if cols[i % 3].checkbox(opt): selected.append(opt) st.write("Selected:", selected) @st.cache_data def get_data(): df = pd.DataFrame( np.random.randn(50, 20), columns=("col %d" % i for i in range(20)) ) return df @st.cache_data def convert_for_download(dfs): return dfs.to_csv(index=False).encode("utf-8") dfs = get_data() csv = convert_for_download(dfs) st.download_button( label="Download CSV", data=csv, file_name="txtperpus.csv", mime="text/csv", icon=":material/download:", ) st.divider() if selection == "Hasil Pencarian": for _, row in key_df_j.reset_index().iterrows(): with st.container(border=True): col1, col2 = st.columns([7,3]) col1.markdown(f"**{row['Title'].strip()}**") col2.caption(f"**{row['Journal'].strip()}, {row['Volume'].strip()} ({row['Issue'].strip()}) - {row['Year'].strip()}**") st.markdown(f"*{row['Authors'].strip()}*") btn1, btn2, btn3, btn4 = st.columns(4) popover1 = btn1.popover("Abstrak", icon="🔤", help="Klik untuk membaca abstrak dalam Bahasa Indonesia", use_container_width=False) popover1.write(row["Abstract (id)"].strip() if row['Abstract (id)'] else "Abstrak tidak tersedia.") popover2 = btn2.popover("Abstract", icon="🔤", help="Klik untuk membaca abstrak dalam Bahasa Inggris", use_container_width=False) popover2.write(row['Abstract (en)'].strip() if row['Abstract (en)'] else "Abstrak tidak tersedia.") btn3.link_button("Baca Artikel", f"https://faizhalas-perpusdb.hf.space/reader?art={row['ID'].strip()}", icon="📄") title = row['Title'].strip() title_query = title.replace(" ", "+") scholar_url = f"https://scholar.google.com/scholar?hl=id&as_sdt=0%2C5&q={title_query}" btn4.link_button("Sumber", scholar_url, icon="🌐") else: st.write("Jurnal") jrnl_counts = key_df_j["Journal"].value_counts().sort_index() jrnl_counts_df = pd.DataFrame({"Journal": jrnl_counts.index, "Count": jrnl_counts.values}) jrnl_counts_df = jrnl_counts_df.set_index("Journal") # Display bar chart #st.bar_chart(jrnl_counts_df) st.write("Tahun") year_counts = key_df_j["Year"].value_counts().sort_index() year_counts_df = pd.DataFrame({"Year": year_counts.index, "Count": year_counts.values}) year_counts_df = year_counts_df.set_index("Year") # Display bar chart #st.bar_chart(year_counts_df) institutions = ( key_df_j["Institution"] .str.split(",") # split by comma .explode() # expand into rows .str.strip() # remove extra spaces ) st.write("Institusi") # Count occurrences inst_counts = institutions.value_counts().reset_index() inst_counts.columns = ["Institution", "Count"] # Horizontal bar chart with Altair chart_ins = ( alt.Chart(inst_counts) .mark_arc() .encode( theta="Count:Q", # besar irisan color="Institution:N", # warna berdasarkan institusi tooltip=["Institution", "Count"] # tampilkan info saat hover ) ) #st.altair_chart(chart_ins, use_container_width=True) author = ( key_df_j["Authors"] .str.split(",") # split by comma .explode() # expand into rows .str.strip() # remove extra spaces ) st.write("Author") # Count occurrences aut_counts = author.value_counts().reset_index() aut_counts.columns = ["Authors", "Count"] # Horizontal bar chart with Altair chart_aut = ( alt.Chart(aut_counts) .mark_bar() .encode( x="Count:Q", y=alt.Y("Authors:N", sort='-x') # sort descending ) ) #st.altair_chart(chart_aut, use_container_width=True) from wordcloud import WordCloud import matplotlib.pyplot as plt keywords = ( key_df_j["Keywords"] .str.split(";") .explode() .str.strip() ) # Count frequencies keyword_counts = keywords.value_counts().to_dict() # --- Generate WordCloud --- wordcloud = WordCloud( width=500, height=500, background_color="white" ).generate_from_frequencies(keyword_counts) # Display in Streamlit fig, ax = plt.subplots(figsize=(10, 5)) ax.imshow(wordcloud, interpolation="bilinear") ax.axis("off") #st.pyplot(fig) st.write(" ") #heatmap institusi pairs = [] for row in key_df_j["Institution"]: cities = [c.strip() for c in row.replace(";", ",").split(",")] for c1, c2 in combinations(cities, 2): pairs.append(tuple(sorted([c1, c2]))) # Hitung frekuensi counter = Counter(pairs) # Ubah jadi DataFrame pasangan df_pairs = pd.DataFrame( [(c1, c2, count) for (c1, c2), count in counter.items()], columns=["Institusi 1", "Institusi 2", "Jumlah"] ) # Heatmap Altair heatmap = ( alt.Chart(df_pairs) .mark_rect() .encode( x=alt.X("Institusi 1:N", sort=None), y=alt.Y("Institusi 2:N", sort=None), color=alt.Color("Jumlah:Q", scale=alt.Scale(scheme="blues")), tooltip=["Institusi 1", "Institusi 2", "Jumlah"] ) ) #st.altair_chart(heatmap, use_container_width=True) # layout col1, col2 = st.columns(2) with col1.container(border=True, gap="medium", height=520): st.bar_chart(jrnl_counts_df, use_container_width=True, horizontal=True, width=500, height=500) with col2.container(border=True, gap="medium", height=520): st.line_chart(year_counts_df, use_container_width=True, width=500, height=500) with col1.container(border=True, gap="medium", height=520): st.altair_chart(chart_ins, use_container_width=True) with col2.container(border=True, gap="medium", height=520): st.altair_chart(chart_aut, use_container_width=True) with col1.container(border=True, gap="medium", height=520): st.pyplot(fig) with col2.container(border=True, gap="medium", height=520): st.altair_chart(heatmap, use_container_width=True) #except Exception as e: #st.write(e) except IndexError: st.write("Mulai dengan ketikan kata kunci pada kotak pencarian.")