File size: 3,325 Bytes
50acb99
 
 
 
 
 
 
 
b700e8b
50acb99
 
ff346e8
aa229e6
 
 
 
 
 
 
50acb99
8ad87ed
50acb99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

# Streamlit app for the frontend
import streamlit as st
from typing import Dict, List
import requests
import json

# Backend API endpoint
API_BASE_URL = "https://chris4k-mdb.hf.space"  # Change this to your deployed backend URL

def fetch_documents():
    response = requests.get(API_BASE_URL)
    try:
        response_json = response.json()
        return response_json.get("documents", [])
    except requests.exceptions.JSONDecodeError:
        print("Invalid JSON response:", response.text)
        return []  # Return an empty list if the response is invalid



def index_content(doc_type: str, source: str, config: Dict):
    """Index content through the backend API."""
    payload = {"doc_type": doc_type, "source": source, "config": config}
    response = requests.post(f"{API_BASE_URL}/index", json=payload)
    return response.json()

def delete_document(doc_id: str):
    """Delete a document by its ID."""
    response = requests.delete(f"{API_BASE_URL}/delete?doc_id={doc_id}")
    return response.json()

def search_documents(query: str, top_k: int = 5):
    """Search documents through the backend API."""
    payload = {"query": query, "top_k": top_k}
    response = requests.post(f"{API_BASE_URL}/search", json=payload)
    return response.json()

# === Streamlit Frontend ===
st.title("ChromaDB Document Manager 📚")

# Sidebar for Navigation
st.sidebar.header("Navigation")
page = st.sidebar.radio("Go to", ["Home", "Add Document", "Search"])

# --- Home Page: List and Manage Documents ---
if page == "Home":
    st.header("Indexed Documents")
    documents = fetch_documents()

    if not documents:
        st.info("No documents indexed yet.")
    else:
        for doc in documents:
            with st.expander(f"Document ID: {doc['doc_id']}"):
                st.write("**Metadata:**")
                st.json(doc)

                # Delete Button
                if st.button(f"Delete {doc['doc_id']}"):
                    result = delete_document(doc['doc_id'])
                    st.success(f"Deleted: {result['doc_id']}")

# --- Add Document Page ---
elif page == "Add Document":
    st.header("Add a New Document")
    doc_type = st.selectbox("Document Type", ["pdf", "webpage", "manual"])
    source = st.text_input("Source (URL for webpage, file path for PDF, or manual text)")
    config = st.text_area(
        "Configuration (JSON)", 
        value=json.dumps({"chunk_size": 1000, "chunk_overlap": 200}, indent=4)
    )

    if st.button("Index Document"):
        try:
            config_dict = json.loads(config)
            result = index_content(doc_type, source, config_dict)
            st.success(f"Document indexed: {result['doc_id']}")
        except Exception as e:
            st.error(f"Error: {str(e)}")

# --- Search Page ---
elif page == "Search":
    st.header("Search Documents")
    query = st.text_input("Enter your search query")
    top_k = st.number_input("Number of Results", min_value=1, max_value=10, value=5)

    if st.button("Search"):
        try:
            results = search_documents(query, top_k)
            st.write("**Results:**")
            for idx, res in enumerate(results["results"]):
                st.markdown(f"**Result {idx + 1}:**")
                st.write(res)
        except Exception as e:
            st.error(f"Error: {str(e)}")