File size: 1,395 Bytes
532f1f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import streamlit as st
from config import EMBEDDINGS_DIR

from embeddings.search import run_search
from embeddings.cluster import run_clustering_pipeline
from embeddings.embedder import (
    initialize_embedding_model,
    initialize_chroma,
    run_pipeline,
)

# CONFIGURAÇÃO BÁSICA STREAMLIT
st.set_page_config(
    page_title="Semantic Clusters Dashboard",
    page_icon="🪐",
    layout="wide",
)

st.title("Semantic Clusters Dashboard")
st.markdown("Visualize document clusters with interactive semantic search.")


@st.cache_resource
def get_embeddings_model():
    return initialize_embedding_model()


@st.cache_resource
def get_vectordb():
    embeddings_model = get_embeddings_model()
    return initialize_chroma(embeddings_model, EMBEDDINGS_DIR)


embedding_model = get_embeddings_model()
vectordb = get_vectordb()

# INTERFACE PRINCIPAL
(
    tab_ingestion,
    tab_clusters,
    tab_search,
) = st.tabs(["Ingestion & Embedding", "3D Clusters", "Semantic Search "])

with tab_ingestion:
    run_pipeline(force_run=False)

with tab_search:
    run_search(embedding_model=embedding_model, vectordb=vectordb)

with tab_clusters:
    st.header("3D Clusters View")
    if st.button("🌀 Generate clusters"):
        with st.spinner("Generating clusters..."):
            run_clustering_pipeline(embedding_model=embedding_model, vectordb=vectordb)
        st.success("Clusters!")