|
|
import os |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
os.environ.setdefault("HOME", "/tmp") |
|
|
os.environ.setdefault("STREAMLIT_USER_SETTINGS_DIR", "/tmp/.streamlit") |
|
|
os.environ.setdefault("HF_HOME", "/tmp/.cache/huggingface") |
|
|
os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", "/tmp/.cache/sentence-transformers") |
|
|
|
|
|
|
|
|
|
|
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" |
|
|
os.environ["STREAMLIT_BROWSER_GATHERUSAGESTATS"] = "false" |
|
|
os.environ["STREAMLIT_SERVER_ADDRESS"] = "0.0.0.0" |
|
|
os.environ["STREAMLIT_SERVER_PORT"] = os.environ.get("PORT", "7860") |
|
|
|
|
|
|
|
|
for p in ["/tmp/.streamlit", "/tmp/.cache/huggingface", "/tmp/.cache/sentence-transformers"]: |
|
|
Path(p).mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
import streamlit as st |
|
|
|
|
|
|
|
|
|
|
|
import streamlit as st |
|
|
import faiss |
|
|
import pickle |
|
|
from sentence_transformers import SentenceTransformer |
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
MODEL_NAME = "Qwen/Qwen3-Embedding-0.6B" |
|
|
|
|
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) |
|
|
INDEX_DIR = os.path.join(SCRIPT_DIR, "data") |
|
|
|
|
|
@st.cache_resource(show_spinner=True) |
|
|
def load_model(): |
|
|
"""Load embedding model""" |
|
|
|
|
|
model = SentenceTransformer(MODEL_NAME) |
|
|
return model |
|
|
|
|
|
@st.cache_resource |
|
|
def load_index(): |
|
|
"""Load FAISS index and metadata""" |
|
|
index_path = os.path.join(INDEX_DIR, "skripsi.faiss") |
|
|
metadata_path = os.path.join(INDEX_DIR, "metadata.pkl") |
|
|
|
|
|
if not os.path.exists(index_path): |
|
|
st.error(f"Index not found: {index_path}") |
|
|
return None, None |
|
|
|
|
|
if not os.path.exists(metadata_path): |
|
|
st.error(f"Metadata not found: {metadata_path}") |
|
|
return None, None |
|
|
|
|
|
index = faiss.read_index(index_path) |
|
|
|
|
|
with open(metadata_path, 'rb') as f: |
|
|
metadata = pickle.load(f) |
|
|
|
|
|
return index, metadata |
|
|
|
|
|
def search(query, model, index, metadata, top_k=10): |
|
|
"""Perform semantic search""" |
|
|
|
|
|
query_embedding = model.encode([query]) |
|
|
|
|
|
|
|
|
distances, indices = index.search(query_embedding, top_k) |
|
|
|
|
|
|
|
|
data_list = metadata.get('data', []) |
|
|
|
|
|
|
|
|
results = [] |
|
|
for i, (dist, idx) in enumerate(zip(distances[0], indices[0])): |
|
|
if idx < len(data_list): |
|
|
meta = data_list[idx] |
|
|
|
|
|
pembimbing = meta.get('nama_pembimbing', 'N/A') |
|
|
gelar_depan = meta.get('gelar_depan_pembimbing', '') |
|
|
gelar_belakang = meta.get('gelar_belakang_pembimbing', '') |
|
|
if gelar_depan or gelar_belakang: |
|
|
pembimbing = f"{gelar_depan} {pembimbing}, {gelar_belakang}".strip(', ') |
|
|
|
|
|
results.append({ |
|
|
'Rank': i + 1, |
|
|
'Score': f"{dist:.4f}", |
|
|
'Judul': meta.get('judul', 'N/A'), |
|
|
'NIM': meta.get('nim', 'N/A'), |
|
|
'Nama': meta.get('nama', 'N/A'), |
|
|
'Pembimbing': pembimbing, |
|
|
'Tahun': meta.get('tahun', 'N/A'), |
|
|
'Semester': meta.get('semester', 'N/A') |
|
|
}) |
|
|
|
|
|
return results |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Semantic Search - Skripsi UNIKOM", layout="wide") |
|
|
|
|
|
st.title("π Semantic Search - Database Skripsi Prodi Teknik Informatika UNIKOM") |
|
|
st.markdown("*Pencarian semantik berdasarkan kemiripan makna judul skripsi*") |
|
|
st.markdown("---") |
|
|
|
|
|
|
|
|
with st.sidebar: |
|
|
st.header("βοΈ Settings") |
|
|
top_k = st.slider("Number of results", min_value=5, max_value=50, value=10, step=5) |
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("### π Model Info") |
|
|
st.info(f""" |
|
|
**Model**: {MODEL_NAME} |
|
|
**Index**: {INDEX_DIR} |
|
|
""") |
|
|
|
|
|
|
|
|
try: |
|
|
model = load_model() |
|
|
index, metadata = load_index() |
|
|
|
|
|
if index is None or metadata is None: |
|
|
st.error("Failed to load index or metadata") |
|
|
st.stop() |
|
|
|
|
|
st.success(f"β
Model loaded | Index: {index.ntotal} vectors | Dimension: {index.d}") |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Error loading resources: {e}") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
st.markdown("### π¬ Enter your search query") |
|
|
query = st.text_input("Search Query", placeholder="e.g., machine learning, web application, sistem informasi...", label_visibility="collapsed") |
|
|
|
|
|
if st.button("π Search", type="primary") or query: |
|
|
if query.strip(): |
|
|
with st.spinner("Searching..."): |
|
|
results = search(query, model, index, metadata, top_k) |
|
|
|
|
|
st.markdown(f"### π Found {len(results)} results") |
|
|
|
|
|
|
|
|
if results: |
|
|
df = pd.DataFrame(results) |
|
|
st.dataframe(df, width="stretch", hide_index=True) |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("### π Detailed Results") |
|
|
for result in results: |
|
|
with st.expander(f"#{result['Rank']} - {result['Judul'][:100]}... (Score: {result['Score']})"): |
|
|
col1, col2 = st.columns(2) |
|
|
with col1: |
|
|
st.markdown(f"**NIM**: {result['NIM']}") |
|
|
st.markdown(f"**Nama**: {result['Nama']}") |
|
|
st.markdown(f"**Pembimbing**: {result['Pembimbing']}") |
|
|
with col2: |
|
|
st.markdown(f"**Tahun**: {result['Tahun']}") |
|
|
st.markdown(f"**Semester**: {result['Semester']}") |
|
|
st.markdown(f"**Judul Lengkap**: {result['Judul']}") |
|
|
else: |
|
|
st.warning("No results found") |
|
|
else: |
|
|
st.warning("Please enter a search query") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown(""" |
|
|
<div style='text-align: center;'> |
|
|
<p><a href='https://galih.eu'>Galih Hermawan</a> | Akabot Research Group</p> |
|
|
<p>Prodi Teknik Informatika | Universitas Komputer Indonesia</p> |
|
|
<p>Powered by Qwen3 Embedding Model</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|