Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,6 +16,8 @@ st.set_page_config(
|
|
| 16 |
config.set_option("server.enableCORS", False)
|
| 17 |
config.set_option("server.enableXsrfProtection", False)
|
| 18 |
|
|
|
|
|
|
|
| 19 |
# ─── CSS ──────────────────────────────────────────────────────────────────────
|
| 20 |
st.markdown("""
|
| 21 |
<style>
|
|
@@ -63,6 +65,10 @@ html, body, [class*="css"] { font-family: 'DM Sans', sans-serif; }
|
|
| 63 |
.ft-excel { background: #064e3b; color: #6ee7b7; }
|
| 64 |
.ft-docx { background: #1e3a5f; color: #7dd3fc; }
|
| 65 |
.ft-text { background: #1c1917; color: #d6d3d1; }
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
[data-testid="stFileUploader"] { background: #1c1c26 !important; border: 2px dashed #2a2a3a !important; border-radius: 12px !important; }
|
| 67 |
.stButton > button {
|
| 68 |
background: linear-gradient(135deg, #7c3aed, #4f46e5) !important;
|
|
@@ -76,6 +82,7 @@ html, body, [class*="css"] { font-family: 'DM Sans', sans-serif; }
|
|
| 76 |
}
|
| 77 |
.badge-ready { background:#14532d; color:#86efac; padding:3px 10px; border-radius:20px; font-size:0.75rem; }
|
| 78 |
.badge-empty { background:#1c1917; color:#a8a29e; padding:3px 10px; border-radius:20px; font-size:0.75rem; }
|
|
|
|
| 79 |
hr { border-color: #2a2a3a !important; }
|
| 80 |
::-webkit-scrollbar { width: 6px; }
|
| 81 |
::-webkit-scrollbar-track { background: #0f0f13; }
|
|
@@ -91,17 +98,14 @@ def load_rag_engine():
|
|
| 91 |
|
| 92 |
# ─── Session state ────────────────────────────────────────────────────────────
|
| 93 |
defaults = {
|
| 94 |
-
"messages":
|
| 95 |
-
"
|
| 96 |
-
"doc_name": "",
|
| 97 |
-
"doc_type": "",
|
| 98 |
-
"chunk_count": 0,
|
| 99 |
-
"processed_hash": "",
|
| 100 |
}
|
| 101 |
for k, v in defaults.items():
|
| 102 |
if k not in st.session_state:
|
| 103 |
st.session_state[k] = v
|
| 104 |
|
|
|
|
| 105 |
def file_type_badge(suffix: str) -> str:
|
| 106 |
m = {
|
| 107 |
".pdf": ("pdf", "PDF"),
|
|
@@ -119,42 +123,106 @@ def file_type_badge(suffix: str) -> str:
|
|
| 119 |
cls, label = m.get(suffix, ("text", suffix.upper()))
|
| 120 |
return f'<span class="filetype-badge ft-{cls}">{label}</span>'
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
# ─── Sidebar ──────────────────────────────────────────────────────────────────
|
| 123 |
with st.sidebar:
|
| 124 |
st.markdown('<p style="font-family:Syne,sans-serif;font-size:1.3rem;font-weight:700;color:#a78bfa;">🧠 DocMind AI</p>', unsafe_allow_html=True)
|
| 125 |
-
st.markdown('<p style="color:#6b6b8a;font-size:0.8rem;">Multimodal RAG ·
|
| 126 |
st.markdown("---")
|
| 127 |
|
| 128 |
-
#
|
| 129 |
-
if
|
| 130 |
-
rag = load_rag_engine()
|
| 131 |
mem_count = rag.get_memory_count()
|
| 132 |
-
st.markdown(f'<span class="badge-ready">✓ Ready</span>', unsafe_allow_html=True)
|
| 133 |
-
suffix = Path(st.session_state.doc_name).suffix.lower()
|
| 134 |
st.markdown(
|
| 135 |
-
f'<
|
| 136 |
-
f'
|
| 137 |
-
unsafe_allow_html=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
)
|
| 139 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
else:
|
| 141 |
-
st.markdown('<span class="badge-empty">○ No
|
| 142 |
|
| 143 |
st.markdown("---")
|
| 144 |
-
st.markdown('<p style="color:#6b6b8a;font-size:0.78rem;font-weight:600;text-transform:uppercase;letter-spacing:0.08em;">Upload Document</p>', unsafe_allow_html=True)
|
| 145 |
-
st.markdown('<p style="color:#6b6b8a;font-size:0.72rem;">PDF · TXT · DOCX · CSV · XLSX · JPG · PNG</p>', unsafe_allow_html=True)
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
| 151 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
if uploaded_file:
|
| 154 |
file_hash = hashlib.md5(uploaded_file.read()).hexdigest()
|
| 155 |
uploaded_file.seek(0)
|
| 156 |
|
| 157 |
-
if
|
|
|
|
|
|
|
|
|
|
| 158 |
suffix = Path(uploaded_file.name).suffix.lower()
|
| 159 |
type_msg = {
|
| 160 |
".pdf": "Reading PDF...",
|
|
@@ -163,59 +231,60 @@ with st.sidebar:
|
|
| 163 |
".csv": "Parsing CSV...",
|
| 164 |
".xlsx": "Parsing Excel...",
|
| 165 |
".xls": "Parsing Excel...",
|
| 166 |
-
".jpg": "🖼️
|
| 167 |
-
".jpeg": "🖼️
|
| 168 |
-
".png": "🖼️
|
| 169 |
-
".webp": "🖼️
|
| 170 |
}.get(suffix, "Processing...")
|
| 171 |
|
| 172 |
with st.spinner(type_msg):
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
st.rerun()
|
| 183 |
|
| 184 |
st.markdown("---")
|
| 185 |
|
| 186 |
-
# Sample doc
|
| 187 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
if st.button("📥 Load Sample: AI Report", use_container_width=True):
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
|
| 203 |
st.markdown("---")
|
| 204 |
|
|
|
|
| 205 |
col_a, col_b = st.columns(2)
|
| 206 |
with col_a:
|
| 207 |
if st.button("🗑️ Clear Chat", use_container_width=True):
|
| 208 |
st.session_state.messages = []
|
| 209 |
-
|
| 210 |
st.rerun()
|
| 211 |
with col_b:
|
| 212 |
-
if st.button("🔄
|
| 213 |
-
|
| 214 |
st.session_state.messages = []
|
| 215 |
-
|
| 216 |
-
global_rag.clear_memory()
|
| 217 |
-
global_rag._vectorstore = None
|
| 218 |
-
global_rag._doc_name = ""
|
| 219 |
st.rerun()
|
| 220 |
|
| 221 |
st.markdown("---")
|
|
@@ -225,51 +294,85 @@ with st.sidebar:
|
|
| 225 |
🔗 LangChain · ChromaDB<br>
|
| 226 |
🤗 MiniLM Embeddings<br>
|
| 227 |
🦙 Llama-3 / Mistral-7B<br>
|
| 228 |
-
🖼️ BLIP
|
| 229 |
💬 Conversation Memory<br>
|
|
|
|
| 230 |
🌊 Streamlit + FastAPI
|
| 231 |
</p>
|
| 232 |
""", unsafe_allow_html=True)
|
| 233 |
|
| 234 |
-
|
|
|
|
| 235 |
st.markdown('<h1 class="hero-title">DocMind AI</h1>', unsafe_allow_html=True)
|
| 236 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
|
| 238 |
-
# Stats
|
| 239 |
-
rag = load_rag_engine()
|
| 240 |
c1, c2, c3, c4 = st.columns(4)
|
| 241 |
with c1:
|
| 242 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
with c2:
|
| 244 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
with c3:
|
| 246 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
with c4:
|
| 248 |
-
|
| 249 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
|
| 251 |
st.markdown("<br>", unsafe_allow_html=True)
|
| 252 |
|
| 253 |
# ─── Chat history ─────────────────────────────────────────────────────────────
|
| 254 |
if not st.session_state.messages:
|
| 255 |
-
if
|
| 256 |
-
|
| 257 |
-
|
|
|
|
| 258 |
st.markdown(f"""
|
| 259 |
<div style="text-align:center;padding:3rem;color:#6b6b8a;">
|
| 260 |
-
<div style="font-size:2.5rem;margin-bottom:1rem;">{
|
| 261 |
-
<p style="font-size:1rem;color:#a78bfa;">
|
| 262 |
-
|
| 263 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
</div>""", unsafe_allow_html=True)
|
| 265 |
else:
|
| 266 |
st.markdown("""
|
| 267 |
<div style="text-align:center;padding:4rem 2rem;color:#6b6b8a;">
|
| 268 |
<div style="font-size:3rem;margin-bottom:1rem;">🧠</div>
|
| 269 |
-
<p style="font-size:1.1rem;color:#a78bfa;font-family:'Syne',sans-serif;font-weight:600;">
|
|
|
|
|
|
|
| 270 |
<p style="font-size:0.85rem;margin-top:0.5rem;">
|
| 271 |
📄 PDF · 📝 Word · 📊 CSV/Excel · 🖼️ Images<br><br>
|
| 272 |
-
Upload in the sidebar or load the sample AI report to get started.
|
|
|
|
| 273 |
</p>
|
| 274 |
</div>""", unsafe_allow_html=True)
|
| 275 |
else:
|
|
@@ -294,29 +397,38 @@ else:
|
|
| 294 |
{sources_html}
|
| 295 |
</div>""", unsafe_allow_html=True)
|
| 296 |
|
| 297 |
-
# ─── Input ───────────────────────────────────────────────────────────────
|
| 298 |
st.markdown("<br>", unsafe_allow_html=True)
|
| 299 |
|
| 300 |
-
if not
|
| 301 |
st.chat_input("Upload a document first...", disabled=True)
|
| 302 |
else:
|
| 303 |
-
placeholder
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
if prompt := st.chat_input(placeholder):
|
| 317 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 318 |
with st.spinner("🔍 Retrieving & generating..."):
|
| 319 |
-
rag = load_rag_engine()
|
| 320 |
answer, sources = rag.query(prompt)
|
| 321 |
mem_count = rag.get_memory_count()
|
| 322 |
st.session_state.messages.append({
|
|
@@ -325,4 +437,4 @@ else:
|
|
| 325 |
"sources": sources,
|
| 326 |
"memory_count": mem_count,
|
| 327 |
})
|
| 328 |
-
st.rerun()
|
|
|
|
| 16 |
config.set_option("server.enableCORS", False)
|
| 17 |
config.set_option("server.enableXsrfProtection", False)
|
| 18 |
|
| 19 |
+
MAX_FILES = 5
|
| 20 |
+
|
| 21 |
# ─── CSS ──────────────────────────────────────────────────────────────────────
|
| 22 |
st.markdown("""
|
| 23 |
<style>
|
|
|
|
| 65 |
.ft-excel { background: #064e3b; color: #6ee7b7; }
|
| 66 |
.ft-docx { background: #1e3a5f; color: #7dd3fc; }
|
| 67 |
.ft-text { background: #1c1917; color: #d6d3d1; }
|
| 68 |
+
.doc-item {
|
| 69 |
+
background: #1c1c26; border: 1px solid #2a2a3a; border-radius: 10px;
|
| 70 |
+
padding: 0.6rem 0.8rem; margin-bottom: 0.4rem;
|
| 71 |
+
}
|
| 72 |
[data-testid="stFileUploader"] { background: #1c1c26 !important; border: 2px dashed #2a2a3a !important; border-radius: 12px !important; }
|
| 73 |
.stButton > button {
|
| 74 |
background: linear-gradient(135deg, #7c3aed, #4f46e5) !important;
|
|
|
|
| 82 |
}
|
| 83 |
.badge-ready { background:#14532d; color:#86efac; padding:3px 10px; border-radius:20px; font-size:0.75rem; }
|
| 84 |
.badge-empty { background:#1c1917; color:#a8a29e; padding:3px 10px; border-radius:20px; font-size:0.75rem; }
|
| 85 |
+
.badge-count { background:#312e81; color:#a5b4fc; padding:3px 10px; border-radius:20px; font-size:0.75rem; }
|
| 86 |
hr { border-color: #2a2a3a !important; }
|
| 87 |
::-webkit-scrollbar { width: 6px; }
|
| 88 |
::-webkit-scrollbar-track { background: #0f0f13; }
|
|
|
|
| 98 |
|
| 99 |
# ─── Session state ────────────────────────────────────────────────────────────
|
| 100 |
defaults = {
|
| 101 |
+
"messages": [],
|
| 102 |
+
"processed_files": {}, # {filename: md5_hash}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
}
|
| 104 |
for k, v in defaults.items():
|
| 105 |
if k not in st.session_state:
|
| 106 |
st.session_state[k] = v
|
| 107 |
|
| 108 |
+
|
| 109 |
def file_type_badge(suffix: str) -> str:
|
| 110 |
m = {
|
| 111 |
".pdf": ("pdf", "PDF"),
|
|
|
|
| 123 |
cls, label = m.get(suffix, ("text", suffix.upper()))
|
| 124 |
return f'<span class="filetype-badge ft-{cls}">{label}</span>'
|
| 125 |
|
| 126 |
+
|
| 127 |
+
def type_emoji(suffix: str) -> str:
|
| 128 |
+
m = {
|
| 129 |
+
".pdf": "📄", ".txt": "📄",
|
| 130 |
+
".docx": "📝", ".doc": "📝",
|
| 131 |
+
".csv": "📊", ".xlsx": "📊", ".xls": "📊",
|
| 132 |
+
".jpg": "🖼️", ".jpeg": "🖼️", ".png": "🖼️", ".webp": "🖼️",
|
| 133 |
+
}
|
| 134 |
+
return m.get(suffix, "📄")
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
# ─── Load RAG engine & get document state ─────────────────────────────────────
|
| 138 |
+
rag = load_rag_engine()
|
| 139 |
+
documents = rag.get_documents() # [{name, type, chunk_count}]
|
| 140 |
+
doc_loaded = len(documents) > 0
|
| 141 |
+
total_chunks = rag.get_total_chunks()
|
| 142 |
+
file_count = rag.get_file_count()
|
| 143 |
+
|
| 144 |
+
|
| 145 |
# ─── Sidebar ──────────────────────────────────────────────────────────────────
|
| 146 |
with st.sidebar:
|
| 147 |
st.markdown('<p style="font-family:Syne,sans-serif;font-size:1.3rem;font-weight:700;color:#a78bfa;">🧠 DocMind AI</p>', unsafe_allow_html=True)
|
| 148 |
+
st.markdown('<p style="color:#6b6b8a;font-size:0.8rem;">Multimodal RAG · Multi-File · Memory</p>', unsafe_allow_html=True)
|
| 149 |
st.markdown("---")
|
| 150 |
|
| 151 |
+
# ── Document List ─────────────────────────────────────────────────────────
|
| 152 |
+
if documents:
|
|
|
|
| 153 |
mem_count = rag.get_memory_count()
|
|
|
|
|
|
|
| 154 |
st.markdown(
|
| 155 |
+
f'<span class="badge-ready">✓ Ready</span> '
|
| 156 |
+
f'<span class="badge-count">{file_count}/{MAX_FILES} files</span>',
|
| 157 |
+
unsafe_allow_html=True,
|
| 158 |
+
)
|
| 159 |
+
st.markdown(
|
| 160 |
+
f'<p style="color:#6b6b8a;font-size:0.78rem;margin-top:0.3rem;">'
|
| 161 |
+
f'{total_chunks} total chunks · {mem_count} exchanges in memory</p>',
|
| 162 |
+
unsafe_allow_html=True,
|
| 163 |
)
|
| 164 |
+
st.markdown("")
|
| 165 |
+
|
| 166 |
+
# Show each document with a remove button
|
| 167 |
+
for doc in documents:
|
| 168 |
+
col_doc, col_rm = st.columns([5, 1])
|
| 169 |
+
with col_doc:
|
| 170 |
+
badge = file_type_badge(doc["type"])
|
| 171 |
+
emoji = type_emoji(doc["type"])
|
| 172 |
+
st.markdown(
|
| 173 |
+
f'<div class="doc-item">'
|
| 174 |
+
f'{badge} <b style="color:#e8e8f0;font-size:0.82rem;">{doc["name"]}</b>'
|
| 175 |
+
f'<br><span style="color:#6b6b8a;font-size:0.72rem;">'
|
| 176 |
+
f'{emoji} {doc["chunk_count"]} chunks</span>'
|
| 177 |
+
f'</div>',
|
| 178 |
+
unsafe_allow_html=True,
|
| 179 |
+
)
|
| 180 |
+
with col_rm:
|
| 181 |
+
st.markdown('<div style="padding-top:0.6rem;"></div>', unsafe_allow_html=True)
|
| 182 |
+
if st.button("❌", key=f"rm_{doc['name']}", help=f"Remove {doc['name']}"):
|
| 183 |
+
rag.remove_file(doc["name"])
|
| 184 |
+
# Remove from processed_files tracking
|
| 185 |
+
st.session_state.processed_files = {
|
| 186 |
+
k: v for k, v in st.session_state.processed_files.items()
|
| 187 |
+
if k != doc["name"]
|
| 188 |
+
}
|
| 189 |
+
st.rerun()
|
| 190 |
else:
|
| 191 |
+
st.markdown('<span class="badge-empty">○ No documents loaded</span>', unsafe_allow_html=True)
|
| 192 |
|
| 193 |
st.markdown("---")
|
|
|
|
|
|
|
| 194 |
|
| 195 |
+
# ── Upload Area ───────────────────────────────────────────────────────────
|
| 196 |
+
st.markdown(
|
| 197 |
+
'<p style="color:#6b6b8a;font-size:0.78rem;font-weight:600;text-transform:uppercase;letter-spacing:0.08em;">'
|
| 198 |
+
'Upload Document</p>',
|
| 199 |
+
unsafe_allow_html=True,
|
| 200 |
)
|
| 201 |
+
st.markdown(
|
| 202 |
+
'<p style="color:#6b6b8a;font-size:0.72rem;">'
|
| 203 |
+
'PDF · TXT · DOCX · CSV · XLSX · JPG · PNG</p>',
|
| 204 |
+
unsafe_allow_html=True,
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
if file_count >= MAX_FILES:
|
| 208 |
+
st.warning(f"Maximum {MAX_FILES} files reached. Remove a file to upload more.")
|
| 209 |
+
uploaded_file = None
|
| 210 |
+
else:
|
| 211 |
+
uploaded_file = st.file_uploader(
|
| 212 |
+
"Upload",
|
| 213 |
+
type=["pdf", "txt", "docx", "doc", "csv", "xlsx", "xls",
|
| 214 |
+
"jpg", "jpeg", "png", "webp"],
|
| 215 |
+
label_visibility="collapsed",
|
| 216 |
+
)
|
| 217 |
|
| 218 |
if uploaded_file:
|
| 219 |
file_hash = hashlib.md5(uploaded_file.read()).hexdigest()
|
| 220 |
uploaded_file.seek(0)
|
| 221 |
|
| 222 |
+
# Check if this exact file (by hash) was already processed
|
| 223 |
+
already_processed = file_hash in st.session_state.processed_files.values()
|
| 224 |
+
|
| 225 |
+
if not already_processed:
|
| 226 |
suffix = Path(uploaded_file.name).suffix.lower()
|
| 227 |
type_msg = {
|
| 228 |
".pdf": "Reading PDF...",
|
|
|
|
| 231 |
".csv": "Parsing CSV...",
|
| 232 |
".xlsx": "Parsing Excel...",
|
| 233 |
".xls": "Parsing Excel...",
|
| 234 |
+
".jpg": "🖼️ Processing image (OCR + captioning)...",
|
| 235 |
+
".jpeg": "🖼️ Processing image (OCR + captioning)...",
|
| 236 |
+
".png": "🖼️ Processing image (OCR + captioning)...",
|
| 237 |
+
".webp": "🖼️ Processing image (OCR + captioning)...",
|
| 238 |
}.get(suffix, "Processing...")
|
| 239 |
|
| 240 |
with st.spinner(type_msg):
|
| 241 |
+
try:
|
| 242 |
+
chunks = rag.ingest_file(uploaded_file)
|
| 243 |
+
st.session_state.processed_files[uploaded_file.name] = file_hash
|
| 244 |
+
st.success(f"✓ Indexed {chunks} chunks from {uploaded_file.name}!")
|
| 245 |
+
st.rerun()
|
| 246 |
+
except ValueError as e:
|
| 247 |
+
st.error(str(e))
|
| 248 |
+
except Exception as e:
|
| 249 |
+
st.error(f"Failed to process file: {e}")
|
|
|
|
| 250 |
|
| 251 |
st.markdown("---")
|
| 252 |
|
| 253 |
+
# ── Sample doc ────────────────────────────────────────────────────────────
|
| 254 |
+
st.markdown(
|
| 255 |
+
'<p style="color:#6b6b8a;font-size:0.78rem;font-weight:600;text-transform:uppercase;letter-spacing:0.08em;">'
|
| 256 |
+
'Or try a sample</p>',
|
| 257 |
+
unsafe_allow_html=True,
|
| 258 |
+
)
|
| 259 |
if st.button("📥 Load Sample: AI Report", use_container_width=True):
|
| 260 |
+
if file_count >= MAX_FILES:
|
| 261 |
+
st.error(f"Maximum {MAX_FILES} files reached. Remove a file first.")
|
| 262 |
+
else:
|
| 263 |
+
with st.spinner("Downloading sample..."):
|
| 264 |
+
from data_downloader import download_sample_doc
|
| 265 |
+
path, name = download_sample_doc()
|
| 266 |
+
try:
|
| 267 |
+
chunks = rag.ingest_path(path, name)
|
| 268 |
+
st.session_state.processed_files[name] = "sample"
|
| 269 |
+
st.success(f"✓ {chunks} chunks loaded!")
|
| 270 |
+
st.rerun()
|
| 271 |
+
except ValueError as e:
|
| 272 |
+
st.error(str(e))
|
| 273 |
|
| 274 |
st.markdown("---")
|
| 275 |
|
| 276 |
+
# ── Action buttons ────────────────────────────────────────────────────────
|
| 277 |
col_a, col_b = st.columns(2)
|
| 278 |
with col_a:
|
| 279 |
if st.button("🗑️ Clear Chat", use_container_width=True):
|
| 280 |
st.session_state.messages = []
|
| 281 |
+
rag.clear_memory()
|
| 282 |
st.rerun()
|
| 283 |
with col_b:
|
| 284 |
+
if st.button("🔄 Reset All", use_container_width=True):
|
| 285 |
+
rag.reset()
|
| 286 |
st.session_state.messages = []
|
| 287 |
+
st.session_state.processed_files = {}
|
|
|
|
|
|
|
|
|
|
| 288 |
st.rerun()
|
| 289 |
|
| 290 |
st.markdown("---")
|
|
|
|
| 294 |
🔗 LangChain · ChromaDB<br>
|
| 295 |
🤗 MiniLM Embeddings<br>
|
| 296 |
🦙 Llama-3 / Mistral-7B<br>
|
| 297 |
+
🖼️ BLIP + VLM Captioning<br>
|
| 298 |
💬 Conversation Memory<br>
|
| 299 |
+
📁 Up to 5 files simultaneously<br>
|
| 300 |
🌊 Streamlit + FastAPI
|
| 301 |
</p>
|
| 302 |
""", unsafe_allow_html=True)
|
| 303 |
|
| 304 |
+
|
| 305 |
+
# ─── Main Area ────────────────────────────────────────────────────────────────
|
| 306 |
st.markdown('<h1 class="hero-title">DocMind AI</h1>', unsafe_allow_html=True)
|
| 307 |
+
st.markdown(
|
| 308 |
+
'<p class="hero-sub">'
|
| 309 |
+
'PDF · Word · CSV · Excel · Images — Upload up to 5 files. Ask anything. Remembers your conversation.'
|
| 310 |
+
'</p>',
|
| 311 |
+
unsafe_allow_html=True,
|
| 312 |
+
)
|
| 313 |
|
| 314 |
+
# ── Stats ─────────────────────────────────────────────────────────────────────
|
|
|
|
| 315 |
c1, c2, c3, c4 = st.columns(4)
|
| 316 |
with c1:
|
| 317 |
+
st.markdown(
|
| 318 |
+
f'<div class="stat-card">'
|
| 319 |
+
f'<div class="stat-number">{total_chunks or "—"}</div>'
|
| 320 |
+
f'<div class="stat-label">Chunks Indexed</div></div>',
|
| 321 |
+
unsafe_allow_html=True,
|
| 322 |
+
)
|
| 323 |
with c2:
|
| 324 |
+
st.markdown(
|
| 325 |
+
f'<div class="stat-card">'
|
| 326 |
+
f'<div class="stat-number">{file_count}/{MAX_FILES}</div>'
|
| 327 |
+
f'<div class="stat-label">Files Loaded</div></div>',
|
| 328 |
+
unsafe_allow_html=True,
|
| 329 |
+
)
|
| 330 |
with c3:
|
| 331 |
+
st.markdown(
|
| 332 |
+
f'<div class="stat-card">'
|
| 333 |
+
f'<div class="stat-number">{len(st.session_state.messages) // 2}</div>'
|
| 334 |
+
f'<div class="stat-label">Questions Asked</div></div>',
|
| 335 |
+
unsafe_allow_html=True,
|
| 336 |
+
)
|
| 337 |
with c4:
|
| 338 |
+
st.markdown(
|
| 339 |
+
f'<div class="stat-card">'
|
| 340 |
+
f'<div class="stat-number">{rag.get_memory_count()}</div>'
|
| 341 |
+
f'<div class="stat-label">Memory Window</div></div>',
|
| 342 |
+
unsafe_allow_html=True,
|
| 343 |
+
)
|
| 344 |
|
| 345 |
st.markdown("<br>", unsafe_allow_html=True)
|
| 346 |
|
| 347 |
# ─── Chat history ─────────────────────────────────────────────────────────────
|
| 348 |
if not st.session_state.messages:
|
| 349 |
+
if doc_loaded:
|
| 350 |
+
# Show loaded files summary
|
| 351 |
+
file_names = ", ".join(f"<b style='color:#e8e8f0;'>{d['name']}</b>" for d in documents)
|
| 352 |
+
emojis = " ".join(set(type_emoji(d["type"]) for d in documents))
|
| 353 |
st.markdown(f"""
|
| 354 |
<div style="text-align:center;padding:3rem;color:#6b6b8a;">
|
| 355 |
+
<div style="font-size:2.5rem;margin-bottom:1rem;">{emojis}</div>
|
| 356 |
+
<p style="font-size:1rem;color:#a78bfa;">
|
| 357 |
+
{file_count} document{'s' if file_count > 1 else ''} ready!
|
| 358 |
+
</p>
|
| 359 |
+
<p style="font-size:0.85rem;">Ask anything about {file_names}</p>
|
| 360 |
+
<p style="font-size:0.78rem;margin-top:0.5rem;">
|
| 361 |
+
I'll remember your conversation — ask follow-up questions naturally.
|
| 362 |
+
{'You can also upload more files (up to 5).' if file_count < MAX_FILES else ''}
|
| 363 |
+
</p>
|
| 364 |
</div>""", unsafe_allow_html=True)
|
| 365 |
else:
|
| 366 |
st.markdown("""
|
| 367 |
<div style="text-align:center;padding:4rem 2rem;color:#6b6b8a;">
|
| 368 |
<div style="font-size:3rem;margin-bottom:1rem;">🧠</div>
|
| 369 |
+
<p style="font-size:1.1rem;color:#a78bfa;font-family:'Syne',sans-serif;font-weight:600;">
|
| 370 |
+
Multimodal RAG — Upload up to 5 files
|
| 371 |
+
</p>
|
| 372 |
<p style="font-size:0.85rem;margin-top:0.5rem;">
|
| 373 |
📄 PDF · 📝 Word · 📊 CSV/Excel · 🖼️ Images<br><br>
|
| 374 |
+
Upload in the sidebar or load the sample AI report to get started.<br>
|
| 375 |
+
You can upload multiple files and ask questions across all of them.
|
| 376 |
</p>
|
| 377 |
</div>""", unsafe_allow_html=True)
|
| 378 |
else:
|
|
|
|
| 397 |
{sources_html}
|
| 398 |
</div>""", unsafe_allow_html=True)
|
| 399 |
|
| 400 |
+
# ─── Chat Input ───────────────────────────────────────────────────────────────
|
| 401 |
st.markdown("<br>", unsafe_allow_html=True)
|
| 402 |
|
| 403 |
+
if not doc_loaded:
|
| 404 |
st.chat_input("Upload a document first...", disabled=True)
|
| 405 |
else:
|
| 406 |
+
# Build a placeholder based on loaded file types
|
| 407 |
+
loaded_types = set(d["type"] for d in documents)
|
| 408 |
+
image_exts = {".jpg", ".jpeg", ".png", ".webp"}
|
| 409 |
+
table_exts = {".csv", ".xlsx", ".xls"}
|
| 410 |
+
|
| 411 |
+
if file_count == 1:
|
| 412 |
+
doc_type = documents[0]["type"]
|
| 413 |
+
placeholder = {
|
| 414 |
+
".pdf": "Ask anything about this PDF...",
|
| 415 |
+
".txt": "Ask anything about this text...",
|
| 416 |
+
".docx": "Ask anything about this document...",
|
| 417 |
+
".doc": "Ask anything about this document...",
|
| 418 |
+
".csv": "Ask about the data, columns, or statistics...",
|
| 419 |
+
".xlsx": "Ask about the spreadsheet data...",
|
| 420 |
+
".xls": "Ask about the spreadsheet data...",
|
| 421 |
+
".jpg": "Ask me what I see in this image...",
|
| 422 |
+
".jpeg": "Ask me what I see in this image...",
|
| 423 |
+
".png": "Ask me what I see in this image...",
|
| 424 |
+
".webp": "Ask me what I see in this image...",
|
| 425 |
+
}.get(doc_type, "Ask anything about your document...")
|
| 426 |
+
else:
|
| 427 |
+
placeholder = f"Ask anything about your {file_count} documents..."
|
| 428 |
|
| 429 |
if prompt := st.chat_input(placeholder):
|
| 430 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 431 |
with st.spinner("🔍 Retrieving & generating..."):
|
|
|
|
| 432 |
answer, sources = rag.query(prompt)
|
| 433 |
mem_count = rag.get_memory_count()
|
| 434 |
st.session_state.messages.append({
|
|
|
|
| 437 |
"sources": sources,
|
| 438 |
"memory_count": mem_count,
|
| 439 |
})
|
| 440 |
+
st.rerun()
|