File size: 7,901 Bytes
bd91918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import streamlit as st
import requests

# ── FastAPI backend URL ────────────────────────────────────────────────────────
# Change this to your HuggingFace Space URL when deployed
# e.g. "https://your-username-your-space-name.hf.space"
API_BASE_URL = "https://happy4040-contextiq.hf.space"

st.set_page_config(page_title="ContextIQ Document Q&A", layout="wide")

st.markdown("""
<style>
.main-header {
    font-size: 2.5rem;
    font-weight: bold;
    text-align: center;
    color: #1f77b4;
}
.stButton>button {
    width: 100%;
}
</style>
""", unsafe_allow_html=True)

# ── Session state ──────────────────────────────────────────────────────────────
if "doc_id" not in st.session_state:
    st.session_state.doc_id = None
if "history" not in st.session_state:
    st.session_state.history = []
if "document_processed" not in st.session_state:
    st.session_state.document_processed = False
if "doc_info" not in st.session_state:
    st.session_state.doc_info = None


def main():

    st.markdown('<div class="main-header">ContextIQ Secure Document Q&A</div>', unsafe_allow_html=True)

    # ── Sidebar ────────────────────────────────────────────────────────────────
    with st.sidebar:
        st.header("Upload Document")

        uploaded_file = st.file_uploader(
            "Choose a document",
            type=["pdf", "docx", "pptx", "xlsx", "txt"]
        )

        st.markdown("---")
        st.subheader("Retrieval Settings")

        k_value = st.slider(
            "Number of document chunks",
            min_value=1,
            max_value=10,
            value=3,
            help="Number of relevant chunks to retrieve for answering questions"
        )

        if st.button("Clear Chat History"):
            st.session_state.history = []
            st.rerun()

        if st.button("Reset Document"):
            st.session_state.doc_id = None
            st.session_state.doc_info = None
            st.session_state.history = []
            st.session_state.document_processed = False
            st.rerun()

        st.markdown("---")

        # ── API health check ───────────────────────────────────────────────────
        try:
            resp = requests.get(f"{API_BASE_URL}/health", timeout=60)
            if resp.status_code == 200:
                st.success("API: Online ")
            else:
                st.error("API: Unhealthy ")
        except Exception:
            st.error("API: Unreachable ")

    # ── Main Area ──────────────────────────────────────────────────────────────
    if uploaded_file:

        # Upload + process document only once per file
        if not st.session_state.document_processed:
            with st.spinner("Uploading and processing document..."):
                try:
                    response = requests.post(
                        f"{API_BASE_URL}/v1/documents",
                        files={"file": (uploaded_file.name, uploaded_file.getvalue(), uploaded_file.type)},
                        timeout=120,
                    )

                    if response.status_code == 200:
                        data = response.json()
                        st.session_state.doc_id = data["doc_id"]
                        st.session_state.doc_info = data
                        st.session_state.document_processed = True
                        st.success(f"Document indexed! ({data['num_chunks']} chunks)")
                    else:
                        st.error(f"Upload failed: {response.json().get('detail', 'Unknown error')}")

                except requests.exceptions.Timeout:
                    st.error("Request timed out. The document may be too large.")
                except Exception as e:
                    st.error(f"Error connecting to API: {e}")

        # ── Doc info ───────────────────────────────────────────────────────────
        if st.session_state.doc_info:
            info = st.session_state.doc_info
            st.info(f"**{info['filename']}** | {info['num_chunks']} chunks | ID: `{info['doc_id']}`")

        st.markdown("---")
        st.subheader("Ask a Question")

        question = st.text_input(
            "Enter your question",
            placeholder="What is this document about?",
            key="question_input"
        )

        if st.button("πŸ” Ask", type="primary") and question:
            if not question.strip():
                st.warning("Please enter a valid question.")
            elif not st.session_state.doc_id:
                st.warning("No document loaded. Please upload a document first.")
            else:
                with st.spinner("Generating answer..."):
                    try:
                        response = requests.post(
                            f"{API_BASE_URL}/v1/query",
                            json={
                                "doc_id": st.session_state.doc_id,
                                "question": question,
                                "k": k_value,
                            },
                            timeout=300,
                        )

                        if response.status_code == 200:
                            result = response.json()
                            st.session_state.history.append({
                                "question": question,
                                "answer": result["answer"],
                                "sources": result["sources"],
                            })
                            st.success("Answer generated!")

                        elif response.status_code == 404:
                            st.error("Document not found. Please re-upload your document.")
                            st.session_state.document_processed = False
                            st.session_state.doc_id = None

                        else:
                            st.error(f"Query failed: {response.json().get('detail', 'Unknown error')}")

                    except requests.exceptions.Timeout:
                        st.error("Query timed out. The model is taking too long on CPU.")
                    except Exception as e:
                        st.error(f"Error connecting to API: {e}")

        # ── Conversation history ───────────────────────────────────────────────
        if st.session_state.history:
            st.markdown("---")
            st.subheader("Conversation History")

            for i, item in enumerate(reversed(st.session_state.history)):
                with st.expander(f"Q{len(st.session_state.history)-i}: {item['question']}", expanded=(i == 0)):
                    st.markdown("**Answer:**")
                    st.write(item["answer"])

                    st.markdown("---")
                    st.markdown("**Retrieved Context:**")
                    for idx, src in enumerate(item["sources"], 1):
                        with st.expander(f"Chunk {idx}"):
                            st.text(src[:500] + "..." if len(src) > 500 else src)

    else:
        st.info("Upload a document from the sidebar to get started.")


if __name__ == "__main__":
    main()