Spaces:
Sleeping
Sleeping
Commit Β·
965240e
1
Parent(s): 819b39a
added limit checks for groq freee tier api
Browse files- README.md +1 -0
- backend/app/api/routes_chat.py +14 -0
- backend/app/api/routes_docs.py +23 -0
- backend/app/config.py +1 -0
- frontend/app/page.tsx +143 -20
README.md
CHANGED
|
@@ -34,6 +34,7 @@ The system goes beyond naive vector similarity by incorporating concept co-occur
|
|
| 34 |
- π **Citation-Aware Responses** β Grounded answers with source attribution
|
| 35 |
- π§© **Conversation Memory** β Short-term context retention across turns
|
| 36 |
- βοΈ **Query Rewriting** β Context-aware reformulation using chat history
|
|
|
|
| 37 |
- π **Evaluation Framework** β Built-in retrieval quality assessment
|
| 38 |
- π§ͺ **Ablation Studies** β Baseline comparisons and performance validation
|
| 39 |
|
|
|
|
| 34 |
- π **Citation-Aware Responses** β Grounded answers with source attribution
|
| 35 |
- π§© **Conversation Memory** β Short-term context retention across turns
|
| 36 |
- βοΈ **Query Rewriting** β Context-aware reformulation using chat history
|
| 37 |
+
- β‘ **Token Limit Protection** β Automatic document size validation to prevent API errors
|
| 38 |
- π **Evaluation Framework** β Built-in retrieval quality assessment
|
| 39 |
- π§ͺ **Ablation Studies** β Baseline comparisons and performance validation
|
| 40 |
|
backend/app/api/routes_chat.py
CHANGED
|
@@ -38,7 +38,21 @@ def chat(request: ChatRequest) -> ChatResponse:
|
|
| 38 |
citations=[],
|
| 39 |
)
|
| 40 |
|
|
|
|
| 41 |
context = "\n\n".join(chunk.text for chunk in chunks)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
messages = build_summary_prompt(context)
|
| 43 |
|
| 44 |
answer = llm_chat(messages=messages)
|
|
|
|
| 38 |
citations=[],
|
| 39 |
)
|
| 40 |
|
| 41 |
+
# ADD TOKEN CHECK HERE
|
| 42 |
context = "\n\n".join(chunk.text for chunk in chunks)
|
| 43 |
+
estimated_tokens = len(context) // 4
|
| 44 |
+
|
| 45 |
+
from app.config import settings
|
| 46 |
+
|
| 47 |
+
if estimated_tokens > settings.max_summary_tokens:
|
| 48 |
+
return ChatResponse(
|
| 49 |
+
answer=f"The selected documents are too large \
|
| 50 |
+
to summarize ({estimated_tokens:,} tokens). "
|
| 51 |
+
f"Maximum allowed: {settings.max_summary_tokens:,} tokens. "
|
| 52 |
+
f"Please select fewer documents or upload smaller PDFs.",
|
| 53 |
+
citations=[],
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
messages = build_summary_prompt(context)
|
| 57 |
|
| 58 |
answer = llm_chat(messages=messages)
|
backend/app/api/routes_docs.py
CHANGED
|
@@ -105,3 +105,26 @@ def list_documents() -> dict:
|
|
| 105 |
"total_chunks": len(chunks),
|
| 106 |
"doc_ids": doc_ids,
|
| 107 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
"total_chunks": len(chunks),
|
| 106 |
"doc_ids": doc_ids,
|
| 107 |
}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
@router.get("/token-counts")
|
| 111 |
+
def get_document_token_counts() -> dict:
|
| 112 |
+
"""Get approximate token counts for all documents.
|
| 113 |
+
|
| 114 |
+
Returns:
|
| 115 |
+
Dictionary with doc_id -> token_count mapping and max limit
|
| 116 |
+
"""
|
| 117 |
+
from app.retrieval.chunk_registry import get_chunks
|
| 118 |
+
|
| 119 |
+
chunks = get_chunks()
|
| 120 |
+
doc_token_counts = {}
|
| 121 |
+
|
| 122 |
+
for chunk in chunks:
|
| 123 |
+
# Rough estimate: 1 token β 4 characters
|
| 124 |
+
tokens = len(chunk.text) // 4
|
| 125 |
+
doc_token_counts[chunk.doc_id] = doc_token_counts.get(chunk.doc_id, 0) + tokens
|
| 126 |
+
|
| 127 |
+
return {
|
| 128 |
+
"doc_token_counts": doc_token_counts,
|
| 129 |
+
"max_summary_tokens": settings.max_summary_tokens,
|
| 130 |
+
}
|
backend/app/config.py
CHANGED
|
@@ -10,6 +10,7 @@ class Settings(BaseSettings):
|
|
| 10 |
default_model: str = "openai/gpt-oss-120b"
|
| 11 |
qdrant_path: str = "/tmp/qdrant"
|
| 12 |
docs_path: str = "/tmp/docs"
|
|
|
|
| 13 |
|
| 14 |
class Config:
|
| 15 |
"""Pydantic Settings configuration."""
|
|
|
|
| 10 |
default_model: str = "openai/gpt-oss-120b"
|
| 11 |
qdrant_path: str = "/tmp/qdrant"
|
| 12 |
docs_path: str = "/tmp/docs"
|
| 13 |
+
max_summary_tokens: int = 6000 # Conservative limit for model openai/gpt-oss-120b
|
| 14 |
|
| 15 |
class Config:
|
| 16 |
"""Pydantic Settings configuration."""
|
frontend/app/page.tsx
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
'use client'
|
| 2 |
|
| 3 |
import React, { useState, useEffect, useRef } from 'react';
|
| 4 |
-
import { Upload, Send, FileText, MessageSquare, Loader2, AlertCircle, Trash2 } from 'lucide-react';
|
| 5 |
|
| 6 |
// Add custom scrollbar styles
|
| 7 |
const customScrollbarStyles = `
|
|
@@ -47,6 +47,11 @@ interface UploadedDocument {
|
|
| 47 |
uploadedAt: string;
|
| 48 |
}
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
type ChatMode = 'qa' | 'summarize';
|
| 51 |
|
| 52 |
// API SERVICE
|
|
@@ -82,6 +87,16 @@ class ApiService {
|
|
| 82 |
return response.json();
|
| 83 |
}
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
static async chat(
|
| 86 |
query: string,
|
| 87 |
mode: ChatMode,
|
|
@@ -529,11 +544,21 @@ function ChatInterface({
|
|
| 529 |
const [loading, setLoading] = useState(false);
|
| 530 |
const [error, setError] = useState<string | null>(null);
|
| 531 |
const [selectedDocIds, setSelectedDocIds] = useState<string[]>([]);
|
|
|
|
| 532 |
const chatEndRef = useRef<HTMLDivElement>(null);
|
| 533 |
|
| 534 |
const messages = mode === 'qa' ? qaMessages : summarizeMessages;
|
| 535 |
const setMessages = mode === 'qa' ? setQaMessages : setSummarizeMessages;
|
| 536 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
useEffect(() => {
|
| 538 |
if (documents.length > 0 && selectedDocIds.length === 0) {
|
| 539 |
setSelectedDocIds(documents.map(d => d.doc_id));
|
|
@@ -631,7 +656,36 @@ function ChatInterface({
|
|
| 631 |
}
|
| 632 |
};
|
| 633 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 634 |
const toggleDocSelection = (docId: string) => {
|
|
|
|
|
|
|
| 635 |
setSelectedDocIds(prev =>
|
| 636 |
prev.includes(docId)
|
| 637 |
? prev.filter(id => id !== docId)
|
|
@@ -640,7 +694,25 @@ function ChatInterface({
|
|
| 640 |
};
|
| 641 |
|
| 642 |
const selectAllDocs = () => {
|
| 643 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 644 |
};
|
| 645 |
|
| 646 |
const deselectAllDocs = () => {
|
|
@@ -654,6 +726,10 @@ function ChatInterface({
|
|
| 654 |
}
|
| 655 |
};
|
| 656 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 657 |
return (
|
| 658 |
<div className="flex flex-col h-full">
|
| 659 |
<div className="flex-shrink-0 border-b border-gray-200 bg-white p-4 shadow-sm">
|
|
@@ -723,7 +799,9 @@ function ChatInterface({
|
|
| 723 |
<div className="flex-1 overflow-y-auto p-6 space-y-4 bg-gradient-to-b from-gray-50 to-white custom-scrollbar">
|
| 724 |
{messages.length === 0 && (
|
| 725 |
<div className="h-full flex items-center justify-center text-gray-400">
|
| 726 |
-
|
|
|
|
|
|
|
| 727 |
{mode === 'qa' ? (
|
| 728 |
<>
|
| 729 |
<div className="w-20 h-20 mx-auto mb-4 bg-blue-100 rounded-full flex items-center justify-center">
|
|
@@ -835,26 +913,71 @@ function ChatInterface({
|
|
| 835 |
</div>
|
| 836 |
</div>
|
| 837 |
<div className="space-y-2 max-h-48 overflow-y-auto pr-2 custom-scrollbar">
|
| 838 |
-
{documents.map(doc =>
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
<
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
|
| 853 |
-
|
| 854 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 855 |
</div>
|
| 856 |
</div>
|
| 857 |
)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 858 |
<button
|
| 859 |
onClick={handleSummarize}
|
| 860 |
disabled={loading || !documentsUploaded || selectedDocIds.length === 0}
|
|
|
|
| 1 |
'use client'
|
| 2 |
|
| 3 |
import React, { useState, useEffect, useRef } from 'react';
|
| 4 |
+
import { Upload, Send, FileText, MessageSquare, Loader2, AlertCircle, Trash2, AlertTriangle } from 'lucide-react';
|
| 5 |
|
| 6 |
// Add custom scrollbar styles
|
| 7 |
const customScrollbarStyles = `
|
|
|
|
| 47 |
uploadedAt: string;
|
| 48 |
}
|
| 49 |
|
| 50 |
+
interface TokenCounts {
|
| 51 |
+
doc_token_counts: Record<string, number>;
|
| 52 |
+
max_summary_tokens: number;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
type ChatMode = 'qa' | 'summarize';
|
| 56 |
|
| 57 |
// API SERVICE
|
|
|
|
| 87 |
return response.json();
|
| 88 |
}
|
| 89 |
|
| 90 |
+
static async getTokenCounts(): Promise<TokenCounts> {
|
| 91 |
+
const response = await fetch(`${API_BASE}/docs/token-counts`);
|
| 92 |
+
|
| 93 |
+
if (!response.ok) {
|
| 94 |
+
throw new Error(`Failed to fetch token counts: ${response.statusText}`);
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
return response.json();
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
static async chat(
|
| 101 |
query: string,
|
| 102 |
mode: ChatMode,
|
|
|
|
| 544 |
const [loading, setLoading] = useState(false);
|
| 545 |
const [error, setError] = useState<string | null>(null);
|
| 546 |
const [selectedDocIds, setSelectedDocIds] = useState<string[]>([]);
|
| 547 |
+
const [tokenCounts, setTokenCounts] = useState<TokenCounts | null>(null);
|
| 548 |
const chatEndRef = useRef<HTMLDivElement>(null);
|
| 549 |
|
| 550 |
const messages = mode === 'qa' ? qaMessages : summarizeMessages;
|
| 551 |
const setMessages = mode === 'qa' ? setQaMessages : setSummarizeMessages;
|
| 552 |
|
| 553 |
+
// Fetch token counts when documents change
|
| 554 |
+
useEffect(() => {
|
| 555 |
+
if (documents.length > 0) {
|
| 556 |
+
ApiService.getTokenCounts()
|
| 557 |
+
.then(setTokenCounts)
|
| 558 |
+
.catch(err => console.error('Failed to fetch token counts:', err));
|
| 559 |
+
}
|
| 560 |
+
}, [documents]);
|
| 561 |
+
|
| 562 |
useEffect(() => {
|
| 563 |
if (documents.length > 0 && selectedDocIds.length === 0) {
|
| 564 |
setSelectedDocIds(documents.map(d => d.doc_id));
|
|
|
|
| 656 |
}
|
| 657 |
};
|
| 658 |
|
| 659 |
+
const getSelectedTokenCount = (): number => {
|
| 660 |
+
if (!tokenCounts) return 0;
|
| 661 |
+
return selectedDocIds.reduce((sum, docId) => {
|
| 662 |
+
return sum + (tokenCounts.doc_token_counts[docId] || 0);
|
| 663 |
+
}, 0);
|
| 664 |
+
};
|
| 665 |
+
|
| 666 |
+
const canSelectDoc = (docId: string): boolean => {
|
| 667 |
+
if (!tokenCounts || mode !== 'summarize') return true;
|
| 668 |
+
|
| 669 |
+
const currentTokens = getSelectedTokenCount();
|
| 670 |
+
const docTokens = tokenCounts.doc_token_counts[docId] || 0;
|
| 671 |
+
|
| 672 |
+
// If already selected, can always deselect
|
| 673 |
+
if (selectedDocIds.includes(docId)) return true;
|
| 674 |
+
|
| 675 |
+
// Check if adding this doc would exceed limit
|
| 676 |
+
return (currentTokens + docTokens) <= tokenCounts.max_summary_tokens;
|
| 677 |
+
};
|
| 678 |
+
|
| 679 |
+
const getDocTokenInfo = (docId: string): { tokens: number; tooLarge: boolean } => {
|
| 680 |
+
if (!tokenCounts) return { tokens: 0, tooLarge: false };
|
| 681 |
+
const tokens = tokenCounts.doc_token_counts[docId] || 0;
|
| 682 |
+
const tooLarge = tokens > tokenCounts.max_summary_tokens;
|
| 683 |
+
return { tokens, tooLarge };
|
| 684 |
+
};
|
| 685 |
+
|
| 686 |
const toggleDocSelection = (docId: string) => {
|
| 687 |
+
if (!canSelectDoc(docId)) return;
|
| 688 |
+
|
| 689 |
setSelectedDocIds(prev =>
|
| 690 |
prev.includes(docId)
|
| 691 |
? prev.filter(id => id !== docId)
|
|
|
|
| 694 |
};
|
| 695 |
|
| 696 |
const selectAllDocs = () => {
|
| 697 |
+
if (!tokenCounts || mode !== 'summarize') {
|
| 698 |
+
setSelectedDocIds(documents.map(d => d.doc_id));
|
| 699 |
+
return;
|
| 700 |
+
}
|
| 701 |
+
|
| 702 |
+
// Add docs one by one until limit reached
|
| 703 |
+
const validDocs: string[] = [];
|
| 704 |
+
let totalTokens = 0;
|
| 705 |
+
|
| 706 |
+
for (const doc of documents) {
|
| 707 |
+
const docTokens = tokenCounts.doc_token_counts[doc.doc_id] || 0;
|
| 708 |
+
if (docTokens > tokenCounts.max_summary_tokens) continue;
|
| 709 |
+
if (totalTokens + docTokens <= tokenCounts.max_summary_tokens) {
|
| 710 |
+
validDocs.push(doc.doc_id);
|
| 711 |
+
totalTokens += docTokens;
|
| 712 |
+
}
|
| 713 |
+
}
|
| 714 |
+
|
| 715 |
+
setSelectedDocIds(validDocs);
|
| 716 |
};
|
| 717 |
|
| 718 |
const deselectAllDocs = () => {
|
|
|
|
| 726 |
}
|
| 727 |
};
|
| 728 |
|
| 729 |
+
const selectedTokens = getSelectedTokenCount();
|
| 730 |
+
const maxTokens = tokenCounts?.max_summary_tokens || 0;
|
| 731 |
+
const tokenPercentage = maxTokens > 0 ? (selectedTokens / maxTokens) * 100 : 0;
|
| 732 |
+
|
| 733 |
return (
|
| 734 |
<div className="flex flex-col h-full">
|
| 735 |
<div className="flex-shrink-0 border-b border-gray-200 bg-white p-4 shadow-sm">
|
|
|
|
| 799 |
<div className="flex-1 overflow-y-auto p-6 space-y-4 bg-gradient-to-b from-gray-50 to-white custom-scrollbar">
|
| 800 |
{messages.length === 0 && (
|
| 801 |
<div className="h-full flex items-center justify-center text-gray-400">
|
| 802 |
+
|
| 803 |
+
|
| 804 |
+
<div className="text-center p-8">
|
| 805 |
{mode === 'qa' ? (
|
| 806 |
<>
|
| 807 |
<div className="w-20 h-20 mx-auto mb-4 bg-blue-100 rounded-full flex items-center justify-center">
|
|
|
|
| 913 |
</div>
|
| 914 |
</div>
|
| 915 |
<div className="space-y-2 max-h-48 overflow-y-auto pr-2 custom-scrollbar">
|
| 916 |
+
{documents.map(doc => {
|
| 917 |
+
const { tokens, tooLarge } = getDocTokenInfo(doc.doc_id);
|
| 918 |
+
const selectable = canSelectDoc(doc.doc_id);
|
| 919 |
+
|
| 920 |
+
return (
|
| 921 |
+
<label
|
| 922 |
+
key={doc.doc_id}
|
| 923 |
+
className={`flex items-center gap-3 p-3 rounded-lg cursor-pointer transition-colors border ${
|
| 924 |
+
!selectable && !selectedDocIds.includes(doc.doc_id)
|
| 925 |
+
? 'opacity-50 cursor-not-allowed bg-gray-100 border-gray-300'
|
| 926 |
+
: 'hover:bg-white border-transparent hover:border-blue-200'
|
| 927 |
+
}`}
|
| 928 |
+
title={!selectable ? `Adding this document would exceed token limit (${tokens.toLocaleString()} tokens)` : ''}
|
| 929 |
+
>
|
| 930 |
+
<input
|
| 931 |
+
type="checkbox"
|
| 932 |
+
checked={selectedDocIds.includes(doc.doc_id)}
|
| 933 |
+
onChange={() => toggleDocSelection(doc.doc_id)}
|
| 934 |
+
disabled={!selectable && !selectedDocIds.includes(doc.doc_id)}
|
| 935 |
+
className="w-4 h-4 text-blue-600 rounded focus:ring-2 focus:ring-blue-500 disabled:cursor-not-allowed"
|
| 936 |
+
/>
|
| 937 |
+
<FileText className={`w-4 h-4 flex-shrink-0 ${tooLarge ? 'text-red-500' : 'text-blue-600'}`} />
|
| 938 |
+
<div className="flex-1 min-w-0">
|
| 939 |
+
<span className="text-sm text-gray-700 truncate block font-medium">
|
| 940 |
+
{doc.filename}
|
| 941 |
+
</span>
|
| 942 |
+
{tooLarge && (
|
| 943 |
+
<span className="text-xs text-red-600 flex items-center gap-1 mt-1">
|
| 944 |
+
<AlertTriangle className="w-3 h-3" />
|
| 945 |
+
Too large ({tokens.toLocaleString()} tokens, max: {maxTokens.toLocaleString()})
|
| 946 |
+
</span>
|
| 947 |
+
)}
|
| 948 |
+
</div>
|
| 949 |
+
</label>
|
| 950 |
+
);
|
| 951 |
+
})}
|
| 952 |
</div>
|
| 953 |
</div>
|
| 954 |
)}
|
| 955 |
+
|
| 956 |
+
{mode === 'summarize' && tokenCounts && selectedDocIds.length > 0 && (
|
| 957 |
+
<div className="bg-blue-50 border border-blue-200 rounded-xl p-4">
|
| 958 |
+
<div className="flex items-center justify-between mb-2">
|
| 959 |
+
<span className="text-sm font-medium text-gray-700">Token Usage</span>
|
| 960 |
+
<span className="text-sm font-bold text-blue-600">
|
| 961 |
+
{selectedTokens.toLocaleString()} / {maxTokens.toLocaleString()}
|
| 962 |
+
</span>
|
| 963 |
+
</div>
|
| 964 |
+
<div className="w-full bg-gray-200 rounded-full h-2">
|
| 965 |
+
<div
|
| 966 |
+
className={`h-2 rounded-full transition-all ${
|
| 967 |
+
tokenPercentage > 90 ? 'bg-red-500' : tokenPercentage > 70 ? 'bg-yellow-500' : 'bg-blue-600'
|
| 968 |
+
}`}
|
| 969 |
+
style={{ width: `${Math.min(tokenPercentage, 100)}%` }}
|
| 970 |
+
/>
|
| 971 |
+
</div>
|
| 972 |
+
{tokenPercentage > 90 && (
|
| 973 |
+
<p className="text-xs text-red-600 mt-2 flex items-center gap-1">
|
| 974 |
+
<AlertTriangle className="w-3 h-3" />
|
| 975 |
+
Warning: Approaching token limit. Consider deselecting some documents.
|
| 976 |
+
</p>
|
| 977 |
+
)}
|
| 978 |
+
</div>
|
| 979 |
+
)}
|
| 980 |
+
|
| 981 |
<button
|
| 982 |
onClick={handleSummarize}
|
| 983 |
disabled={loading || !documentsUploaded || selectedDocIds.length === 0}
|