File size: 6,297 Bytes
c5ff0a8
87e5b75
 
c869b02
87e5b75
 
 
 
c869b02
 
 
45786d1
87e5b75
 
 
 
c869b02
 
87e5b75
c869b02
 
 
 
c5ff0a8
 
5565009
c5ff0a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c869b02
c5ff0a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5565009
c5ff0a8
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import os
from pathlib import Path

# Pastikan Streamlit & HF cache menulis ke /tmp (selalu writable di Spaces)
os.environ.setdefault("HOME", "/tmp")
os.environ.setdefault("STREAMLIT_USER_SETTINGS_DIR", "/tmp/.streamlit")
os.environ.setdefault("HF_HOME", "/tmp/.cache/huggingface")
os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", "/tmp/.cache/sentence-transformers")
# TRANSFORMERS_CACHE deprecated; HF memakainya dari HF_HOME -> boleh dihapus
# os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/.cache/huggingface/transformers")

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["STREAMLIT_BROWSER_GATHERUSAGESTATS"] = "false"
os.environ["STREAMLIT_SERVER_ADDRESS"] = "0.0.0.0"
os.environ["STREAMLIT_SERVER_PORT"] = os.environ.get("PORT", "7860")

# Buat folder-foldernya
for p in ["/tmp/.streamlit", "/tmp/.cache/huggingface", "/tmp/.cache/sentence-transformers"]:
    Path(p).mkdir(parents=True, exist_ok=True)
# ---- END PATCH ----

import streamlit as st


# Sekarang baru import streamlit
import streamlit as st
import faiss
import pickle
from sentence_transformers import SentenceTransformer
import pandas as pd

# Konfigurasi
MODEL_NAME = "Qwen/Qwen3-Embedding-0.6B"

# Get absolute path for data directory (independent from maintenance_web)
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
INDEX_DIR = os.path.join(SCRIPT_DIR, "data")

@st.cache_resource(show_spinner=True)
def load_model():
    """Load embedding model"""
    # Model akan di-cache otomatis
    model = SentenceTransformer(MODEL_NAME)
    return model

@st.cache_resource
def load_index():
    """Load FAISS index and metadata"""
    index_path = os.path.join(INDEX_DIR, "skripsi.faiss")
    metadata_path = os.path.join(INDEX_DIR, "metadata.pkl")
    
    if not os.path.exists(index_path):
        st.error(f"Index not found: {index_path}")
        return None, None
    
    if not os.path.exists(metadata_path):
        st.error(f"Metadata not found: {metadata_path}")
        return None, None
    
    index = faiss.read_index(index_path)
    
    with open(metadata_path, 'rb') as f:
        metadata = pickle.load(f)
    
    return index, metadata

def search(query, model, index, metadata, top_k=10):
    """Perform semantic search"""
    # Generate query embedding
    query_embedding = model.encode([query])
    
    # Search
    distances, indices = index.search(query_embedding, top_k)
    
    # Get data list from metadata
    data_list = metadata.get('data', [])
    
    # Format results
    results = []
    for i, (dist, idx) in enumerate(zip(distances[0], indices[0])):
        if idx < len(data_list):
            meta = data_list[idx]
            # Combine pembimbing info
            pembimbing = meta.get('nama_pembimbing', 'N/A')
            gelar_depan = meta.get('gelar_depan_pembimbing', '')
            gelar_belakang = meta.get('gelar_belakang_pembimbing', '')
            if gelar_depan or gelar_belakang:
                pembimbing = f"{gelar_depan} {pembimbing}, {gelar_belakang}".strip(', ')
            
            results.append({
                'Rank': i + 1,
                'Score': f"{dist:.4f}",
                'Judul': meta.get('judul', 'N/A'),
                'NIM': meta.get('nim', 'N/A'),
                'Nama': meta.get('nama', 'N/A'),
                'Pembimbing': pembimbing,
                'Tahun': meta.get('tahun', 'N/A'),
                'Semester': meta.get('semester', 'N/A')
            })
    
    return results

# Streamlit UI
st.set_page_config(page_title="Semantic Search - Skripsi UNIKOM", layout="wide")

st.title("πŸ” Semantic Search - Database Skripsi Prodi Teknik Informatika UNIKOM")
st.markdown("*Pencarian semantik berdasarkan kemiripan makna judul skripsi*")
st.markdown("---")

# Sidebar
with st.sidebar:
    st.header("βš™οΈ Settings")
    top_k = st.slider("Number of results", min_value=5, max_value=50, value=10, step=5)
    
    st.markdown("---")
    st.markdown("### πŸ“Š Model Info")
    st.info(f"""
    **Model**: {MODEL_NAME}
    **Index**: {INDEX_DIR}
    """)

# Load resources
try:
    model = load_model()
    index, metadata = load_index()
    
    if index is None or metadata is None:
        st.error("Failed to load index or metadata")
        st.stop()
    
    st.success(f"βœ… Model loaded | Index: {index.ntotal} vectors | Dimension: {index.d}")
    
except Exception as e:
    st.error(f"Error loading resources: {e}")
    st.stop()

# Search interface
st.markdown("### πŸ’¬ Enter your search query")
query = st.text_input("Search Query", placeholder="e.g., machine learning, web application, sistem informasi...", label_visibility="collapsed")

if st.button("πŸ” Search", type="primary") or query:
    if query.strip():
        with st.spinner("Searching..."):
            results = search(query, model, index, metadata, top_k)
        
        st.markdown(f"### πŸ“‹ Found {len(results)} results")
        
        # Display as dataframe
        if results:
            df = pd.DataFrame(results)
            st.dataframe(df, width="stretch", hide_index=True)
            
            # Detailed view
            st.markdown("---")
            st.markdown("### πŸ“– Detailed Results")
            for result in results:
                with st.expander(f"#{result['Rank']} - {result['Judul'][:100]}... (Score: {result['Score']})"):
                    col1, col2 = st.columns(2)
                    with col1:
                        st.markdown(f"**NIM**: {result['NIM']}")
                        st.markdown(f"**Nama**: {result['Nama']}")
                        st.markdown(f"**Pembimbing**: {result['Pembimbing']}")
                    with col2:
                        st.markdown(f"**Tahun**: {result['Tahun']}")
                        st.markdown(f"**Semester**: {result['Semester']}")
                    st.markdown(f"**Judul Lengkap**: {result['Judul']}")
        else:
            st.warning("No results found")
    else:
        st.warning("Please enter a search query")

# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center;'>
    <p><a href='https://galih.eu'>Galih Hermawan</a> | Akabot Research Group</p>
    <p>Prodi Teknik Informatika | Universitas Komputer Indonesia</p>
    <p>Powered by Qwen3 Embedding Model</p>
</div>
""", unsafe_allow_html=True)