Spaces:
Running
Running
Merge pull request #133 from Jiya3177/feat/export-chat-history-125
Browse files- backend/app/routes/chat.py +97 -11
- backend/requirements.txt +1 -0
- frontend/src/components/chat/ChatPanel.tsx +10 -2
backend/app/routes/chat.py
CHANGED
|
@@ -1,12 +1,19 @@
|
|
| 1 |
"""
|
| 2 |
Chat routes — ask questions with RAG, stream responses via SSE, manage history.
|
| 3 |
"""
|
|
|
|
| 4 |
import json
|
|
|
|
|
|
|
| 5 |
import logging
|
| 6 |
from typing import Optional
|
| 7 |
|
| 8 |
from fastapi import APIRouter, Depends, HTTPException
|
| 9 |
-
from fastapi.responses import StreamingResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from sqlalchemy.orm import Session
|
| 11 |
|
| 12 |
from app.database import get_db
|
|
@@ -253,32 +260,31 @@ def export_chat_history(
|
|
| 253 |
):
|
| 254 |
"""Export the chat history for a document as a downloadable file.
|
| 255 |
|
| 256 |
-
Supports Markdown (.md)
|
| 257 |
authentication via either the standard `Authorization: Bearer <token>`
|
| 258 |
header (handled by the dependency chain) or a `token` query parameter to
|
| 259 |
facilitate browser-initiated downloads that cannot set custom headers.
|
| 260 |
|
| 261 |
Args:
|
| 262 |
document_id: The unique identifier of the document whose chat history is to be exported.
|
| 263 |
-
format: Output format, either "md" (Markdown)
|
| 264 |
token: Optional JWT token passed as a query parameter. Used for browser
|
| 265 |
downloads when the `Authorization` header is not available.
|
| 266 |
db: SQLAlchemy database session, obtained from the dependency.
|
| 267 |
|
| 268 |
Returns:
|
| 269 |
Response: A FastAPI `Response` object with:
|
| 270 |
-
- `content`: Formatted chat history as a string.
|
| 271 |
-
- `media_type`: `text/markdown`
|
| 272 |
- `headers`: `Content-Disposition` attachment header with a generated filename.
|
| 273 |
|
| 274 |
Raises:
|
| 275 |
HTTPException: 401 if neither the token query parameter nor a valid
|
| 276 |
bearer token provides an authenticated user.
|
| 277 |
-
HTTPException: 400 if the `format` parameter is not "md" or "
|
| 278 |
HTTPException: 404 if the document does not exist or does not belong
|
| 279 |
to the user, or if no chat messages are found for the document.
|
| 280 |
"""
|
| 281 |
-
from fastapi import Request
|
| 282 |
from app.auth import decode_token as _decode
|
| 283 |
|
| 284 |
# Resolve user from query-param token (browser download links can't set headers)
|
|
@@ -291,8 +297,8 @@ def export_chat_history(
|
|
| 291 |
if resolved_user is None:
|
| 292 |
raise HTTPException(status_code=401, detail="Authentication required")
|
| 293 |
|
| 294 |
-
if format not in ("md", "txt"):
|
| 295 |
-
raise HTTPException(status_code=400, detail="Format must be 'md' or '
|
| 296 |
|
| 297 |
# Verify document exists and belongs to user
|
| 298 |
doc = db.query(Document).filter(
|
|
@@ -320,15 +326,18 @@ def export_chat_history(
|
|
| 320 |
content = _format_markdown(doc, messages)
|
| 321 |
media_type = "text/markdown"
|
| 322 |
extension = "md"
|
| 323 |
-
|
| 324 |
content = _format_plaintext(doc, messages)
|
| 325 |
media_type = "text/plain"
|
| 326 |
extension = "txt"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
safe_name = doc.original_name.rsplit(".", 1)[0]
|
| 329 |
filename = f"{safe_name}_chat_history.{extension}"
|
| 330 |
|
| 331 |
-
from fastapi.responses import Response
|
| 332 |
return Response(
|
| 333 |
content=content,
|
| 334 |
media_type=media_type,
|
|
@@ -527,3 +536,80 @@ def _format_plaintext(doc, messages) -> str:
|
|
| 527 |
|
| 528 |
return "\n".join(lines)
|
| 529 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
Chat routes — ask questions with RAG, stream responses via SSE, manage history.
|
| 3 |
"""
|
| 4 |
+
import html
|
| 5 |
import json
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from io import BytesIO
|
| 8 |
import logging
|
| 9 |
from typing import Optional
|
| 10 |
|
| 11 |
from fastapi import APIRouter, Depends, HTTPException
|
| 12 |
+
from fastapi.responses import Response, StreamingResponse
|
| 13 |
+
from reportlab.lib.pagesizes import letter
|
| 14 |
+
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
|
| 15 |
+
from reportlab.lib.units import inch
|
| 16 |
+
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
|
| 17 |
from sqlalchemy.orm import Session
|
| 18 |
|
| 19 |
from app.database import get_db
|
|
|
|
| 260 |
):
|
| 261 |
"""Export the chat history for a document as a downloadable file.
|
| 262 |
|
| 263 |
+
Supports Markdown (.md), plain text (.txt), or PDF (.pdf) export. The function accepts
|
| 264 |
authentication via either the standard `Authorization: Bearer <token>`
|
| 265 |
header (handled by the dependency chain) or a `token` query parameter to
|
| 266 |
facilitate browser-initiated downloads that cannot set custom headers.
|
| 267 |
|
| 268 |
Args:
|
| 269 |
document_id: The unique identifier of the document whose chat history is to be exported.
|
| 270 |
+
format: Output format, either "md" (Markdown), "txt" (plain text), or "pdf". Defaults to "md".
|
| 271 |
token: Optional JWT token passed as a query parameter. Used for browser
|
| 272 |
downloads when the `Authorization` header is not available.
|
| 273 |
db: SQLAlchemy database session, obtained from the dependency.
|
| 274 |
|
| 275 |
Returns:
|
| 276 |
Response: A FastAPI `Response` object with:
|
| 277 |
+
- `content`: Formatted chat history as a string or PDF bytes.
|
| 278 |
+
- `media_type`: `text/markdown`, `text/plain`, or `application/pdf`.
|
| 279 |
- `headers`: `Content-Disposition` attachment header with a generated filename.
|
| 280 |
|
| 281 |
Raises:
|
| 282 |
HTTPException: 401 if neither the token query parameter nor a valid
|
| 283 |
bearer token provides an authenticated user.
|
| 284 |
+
HTTPException: 400 if the `format` parameter is not "md", "txt", or "pdf".
|
| 285 |
HTTPException: 404 if the document does not exist or does not belong
|
| 286 |
to the user, or if no chat messages are found for the document.
|
| 287 |
"""
|
|
|
|
| 288 |
from app.auth import decode_token as _decode
|
| 289 |
|
| 290 |
# Resolve user from query-param token (browser download links can't set headers)
|
|
|
|
| 297 |
if resolved_user is None:
|
| 298 |
raise HTTPException(status_code=401, detail="Authentication required")
|
| 299 |
|
| 300 |
+
if format not in ("md", "txt", "pdf"):
|
| 301 |
+
raise HTTPException(status_code=400, detail="Format must be 'md', 'txt', or 'pdf'")
|
| 302 |
|
| 303 |
# Verify document exists and belongs to user
|
| 304 |
doc = db.query(Document).filter(
|
|
|
|
| 326 |
content = _format_markdown(doc, messages)
|
| 327 |
media_type = "text/markdown"
|
| 328 |
extension = "md"
|
| 329 |
+
elif format == "txt":
|
| 330 |
content = _format_plaintext(doc, messages)
|
| 331 |
media_type = "text/plain"
|
| 332 |
extension = "txt"
|
| 333 |
+
else:
|
| 334 |
+
content = _format_pdf(doc, messages)
|
| 335 |
+
media_type = "application/pdf"
|
| 336 |
+
extension = "pdf"
|
| 337 |
|
| 338 |
safe_name = doc.original_name.rsplit(".", 1)[0]
|
| 339 |
filename = f"{safe_name}_chat_history.{extension}"
|
| 340 |
|
|
|
|
| 341 |
return Response(
|
| 342 |
content=content,
|
| 343 |
media_type=media_type,
|
|
|
|
| 536 |
|
| 537 |
return "\n".join(lines)
|
| 538 |
|
| 539 |
+
|
| 540 |
+
def _format_pdf(doc, messages) -> bytes:
|
| 541 |
+
"""Format chat history as a PDF document."""
|
| 542 |
+
buffer = BytesIO()
|
| 543 |
+
pdf = SimpleDocTemplate(
|
| 544 |
+
buffer,
|
| 545 |
+
pagesize=letter,
|
| 546 |
+
leftMargin=0.75 * inch,
|
| 547 |
+
rightMargin=0.75 * inch,
|
| 548 |
+
topMargin=0.75 * inch,
|
| 549 |
+
bottomMargin=0.75 * inch,
|
| 550 |
+
)
|
| 551 |
+
|
| 552 |
+
styles = getSampleStyleSheet()
|
| 553 |
+
metadata_style = styles["Normal"]
|
| 554 |
+
metadata_style.spaceAfter = 6
|
| 555 |
+
content_style = ParagraphStyle(
|
| 556 |
+
"ChatContent",
|
| 557 |
+
parent=styles["BodyText"],
|
| 558 |
+
leading=14,
|
| 559 |
+
spaceAfter=10,
|
| 560 |
+
)
|
| 561 |
+
source_style = ParagraphStyle(
|
| 562 |
+
"ChatSource",
|
| 563 |
+
parent=styles["BodyText"],
|
| 564 |
+
leftIndent=14,
|
| 565 |
+
leading=12,
|
| 566 |
+
spaceAfter=4,
|
| 567 |
+
)
|
| 568 |
+
|
| 569 |
+
story = [
|
| 570 |
+
Paragraph(f"Chat History - {html.escape(doc.original_name)}", styles["Title"]),
|
| 571 |
+
Spacer(1, 0.15 * inch),
|
| 572 |
+
Paragraph(f"Document: {html.escape(doc.original_name)}", metadata_style),
|
| 573 |
+
Paragraph(f"Exported at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", metadata_style),
|
| 574 |
+
Paragraph(f"Total messages: {len(messages)}", metadata_style),
|
| 575 |
+
Spacer(1, 0.2 * inch),
|
| 576 |
+
]
|
| 577 |
+
|
| 578 |
+
for msg in messages:
|
| 579 |
+
timestamp = msg.created_at.strftime("%Y-%m-%d %H:%M:%S") if msg.created_at else ""
|
| 580 |
+
role_label = "You" if msg.role == "user" else "Assistant"
|
| 581 |
+
|
| 582 |
+
story.append(Paragraph(f"<b>{html.escape(role_label)}</b>", styles["Heading3"]))
|
| 583 |
+
story.append(Paragraph(html.escape(timestamp), styles["Italic"]))
|
| 584 |
+
story.append(Paragraph(_pdf_text(msg.content), content_style))
|
| 585 |
+
|
| 586 |
+
if msg.role == "assistant" and msg.sources_json:
|
| 587 |
+
try:
|
| 588 |
+
sources = json.loads(msg.sources_json)
|
| 589 |
+
if sources:
|
| 590 |
+
story.append(Paragraph("<b>Sources:</b>", metadata_style))
|
| 591 |
+
for i, src in enumerate(sources, 1):
|
| 592 |
+
filename = html.escape(str(src.get("filename", "Unknown")))
|
| 593 |
+
page = html.escape(str(src.get("page", "?")))
|
| 594 |
+
confidence = html.escape(str(src.get("confidence", 0)))
|
| 595 |
+
story.append(
|
| 596 |
+
Paragraph(
|
| 597 |
+
f"[{i}] {filename}, Page {page} (Confidence: {confidence}%)",
|
| 598 |
+
source_style,
|
| 599 |
+
)
|
| 600 |
+
)
|
| 601 |
+
text_preview = str(src.get("text", "")).strip()
|
| 602 |
+
if text_preview:
|
| 603 |
+
story.append(Paragraph(_pdf_text(text_preview), source_style))
|
| 604 |
+
except Exception:
|
| 605 |
+
pass
|
| 606 |
+
|
| 607 |
+
story.append(Spacer(1, 0.15 * inch))
|
| 608 |
+
|
| 609 |
+
pdf.build(story)
|
| 610 |
+
return buffer.getvalue()
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
def _pdf_text(text: str) -> str:
|
| 614 |
+
"""Escape text for ReportLab paragraphs while preserving line breaks."""
|
| 615 |
+
return html.escape(text or "").replace("\n", "<br/>")
|
backend/requirements.txt
CHANGED
|
@@ -48,3 +48,4 @@ python-magic-bin==0.4.27; sys_platform == "win32" # for windows
|
|
| 48 |
python-magic; sys_platform != "win32"
|
| 49 |
python-docx
|
| 50 |
pypdf
|
|
|
|
|
|
| 48 |
python-magic; sys_platform != "win32"
|
| 49 |
python-docx
|
| 50 |
pypdf
|
| 51 |
+
reportlab
|
frontend/src/components/chat/ChatPanel.tsx
CHANGED
|
@@ -211,7 +211,7 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
|
|
| 211 |
}
|
| 212 |
};
|
| 213 |
|
| 214 |
-
const handleExport = (format: "md" | "txt") => {
|
| 215 |
if (!activeDoc) return;
|
| 216 |
setShowExportMenu(false);
|
| 217 |
const token = localStorage.getItem("token");
|
|
@@ -350,6 +350,14 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
|
|
| 350 |
<span className="text-base">📄</span>
|
| 351 |
Plain Text (.txt)
|
| 352 |
</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
</div>
|
| 354 |
)}
|
| 355 |
</div>
|
|
@@ -369,4 +377,4 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
|
|
| 369 |
</div>
|
| 370 |
</div>
|
| 371 |
);
|
| 372 |
-
}
|
|
|
|
| 211 |
}
|
| 212 |
};
|
| 213 |
|
| 214 |
+
const handleExport = (format: "md" | "txt" | "pdf") => {
|
| 215 |
if (!activeDoc) return;
|
| 216 |
setShowExportMenu(false);
|
| 217 |
const token = localStorage.getItem("token");
|
|
|
|
| 350 |
<span className="text-base">📄</span>
|
| 351 |
Plain Text (.txt)
|
| 352 |
</button>
|
| 353 |
+
<button
|
| 354 |
+
id="export-pdf-btn"
|
| 355 |
+
onClick={() => handleExport("pdf")}
|
| 356 |
+
className="w-full flex items-center gap-2 rounded-md px-3 py-2 text-sm hover:bg-accent transition-colors text-left"
|
| 357 |
+
>
|
| 358 |
+
<span className="text-base">📕</span>
|
| 359 |
+
PDF (.pdf)
|
| 360 |
+
</button>
|
| 361 |
</div>
|
| 362 |
)}
|
| 363 |
</div>
|
|
|
|
| 377 |
</div>
|
| 378 |
</div>
|
| 379 |
);
|
| 380 |
+
}
|