import os from pathlib import Path # Pastikan Streamlit & HF cache menulis ke /tmp (selalu writable di Spaces) os.environ.setdefault("HOME", "/tmp") os.environ.setdefault("STREAMLIT_USER_SETTINGS_DIR", "/tmp/.streamlit") os.environ.setdefault("HF_HOME", "/tmp/.cache/huggingface") os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", "/tmp/.cache/sentence-transformers") # TRANSFORMERS_CACHE deprecated; HF memakainya dari HF_HOME -> boleh dihapus # os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/.cache/huggingface/transformers") os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["STREAMLIT_BROWSER_GATHERUSAGESTATS"] = "false" os.environ["STREAMLIT_SERVER_ADDRESS"] = "0.0.0.0" os.environ["STREAMLIT_SERVER_PORT"] = os.environ.get("PORT", "7860") # Buat folder-foldernya for p in ["/tmp/.streamlit", "/tmp/.cache/huggingface", "/tmp/.cache/sentence-transformers"]: Path(p).mkdir(parents=True, exist_ok=True) # ---- END PATCH ---- import streamlit as st # Sekarang baru import streamlit import streamlit as st import faiss import pickle from sentence_transformers import SentenceTransformer import pandas as pd # Konfigurasi MODEL_NAME = "Qwen/Qwen3-Embedding-0.6B" # Get absolute path for data directory (independent from maintenance_web) SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) INDEX_DIR = os.path.join(SCRIPT_DIR, "data") @st.cache_resource(show_spinner=True) def load_model(): """Load embedding model""" # Model akan di-cache otomatis model = SentenceTransformer(MODEL_NAME) return model @st.cache_resource def load_index(): """Load FAISS index and metadata""" index_path = os.path.join(INDEX_DIR, "skripsi.faiss") metadata_path = os.path.join(INDEX_DIR, "metadata.pkl") if not os.path.exists(index_path): st.error(f"Index not found: {index_path}") return None, None if not os.path.exists(metadata_path): st.error(f"Metadata not found: {metadata_path}") return None, None index = faiss.read_index(index_path) with open(metadata_path, 'rb') as f: metadata = pickle.load(f) return index, metadata def search(query, model, index, metadata, top_k=10): """Perform semantic search""" # Generate query embedding query_embedding = model.encode([query]) # Search distances, indices = index.search(query_embedding, top_k) # Get data list from metadata data_list = metadata.get('data', []) # Format results results = [] for i, (dist, idx) in enumerate(zip(distances[0], indices[0])): if idx < len(data_list): meta = data_list[idx] # Combine pembimbing info pembimbing = meta.get('nama_pembimbing', 'N/A') gelar_depan = meta.get('gelar_depan_pembimbing', '') gelar_belakang = meta.get('gelar_belakang_pembimbing', '') if gelar_depan or gelar_belakang: pembimbing = f"{gelar_depan} {pembimbing}, {gelar_belakang}".strip(', ') results.append({ 'Rank': i + 1, 'Score': f"{dist:.4f}", 'Judul': meta.get('judul', 'N/A'), 'NIM': meta.get('nim', 'N/A'), 'Nama': meta.get('nama', 'N/A'), 'Pembimbing': pembimbing, 'Tahun': meta.get('tahun', 'N/A'), 'Semester': meta.get('semester', 'N/A') }) return results # Streamlit UI st.set_page_config(page_title="Semantic Search - Skripsi UNIKOM", layout="wide") st.title("🔍 Semantic Search - Database Skripsi Prodi Teknik Informatika UNIKOM") st.markdown("*Pencarian semantik berdasarkan kemiripan makna judul skripsi*") st.markdown("---") # Sidebar with st.sidebar: st.header("⚙️ Settings") top_k = st.slider("Number of results", min_value=5, max_value=50, value=10, step=5) st.markdown("---") st.markdown("### 📊 Model Info") st.info(f""" **Model**: {MODEL_NAME} **Index**: {INDEX_DIR} """) # Load resources try: model = load_model() index, metadata = load_index() if index is None or metadata is None: st.error("Failed to load index or metadata") st.stop() st.success(f"✅ Model loaded | Index: {index.ntotal} vectors | Dimension: {index.d}") except Exception as e: st.error(f"Error loading resources: {e}") st.stop() # Search interface st.markdown("### 💬 Enter your search query") query = st.text_input("Search Query", placeholder="e.g., machine learning, web application, sistem informasi...", label_visibility="collapsed") if st.button("🔍 Search", type="primary") or query: if query.strip(): with st.spinner("Searching..."): results = search(query, model, index, metadata, top_k) st.markdown(f"### 📋 Found {len(results)} results") # Display as dataframe if results: df = pd.DataFrame(results) st.dataframe(df, width="stretch", hide_index=True) # Detailed view st.markdown("---") st.markdown("### 📖 Detailed Results") for result in results: with st.expander(f"#{result['Rank']} - {result['Judul'][:100]}... (Score: {result['Score']})"): col1, col2 = st.columns(2) with col1: st.markdown(f"**NIM**: {result['NIM']}") st.markdown(f"**Nama**: {result['Nama']}") st.markdown(f"**Pembimbing**: {result['Pembimbing']}") with col2: st.markdown(f"**Tahun**: {result['Tahun']}") st.markdown(f"**Semester**: {result['Semester']}") st.markdown(f"**Judul Lengkap**: {result['Judul']}") else: st.warning("No results found") else: st.warning("Please enter a search query") # Footer st.markdown("---") st.markdown("""
Galih Hermawan | Akabot Research Group
Prodi Teknik Informatika | Universitas Komputer Indonesia
Powered by Qwen3 Embedding Model