SanskarModi commited on
Commit
965240e
Β·
1 Parent(s): 819b39a

added limit checks for groq freee tier api

Browse files
README.md CHANGED
@@ -34,6 +34,7 @@ The system goes beyond naive vector similarity by incorporating concept co-occur
34
  - πŸ“š **Citation-Aware Responses** – Grounded answers with source attribution
35
  - 🧩 **Conversation Memory** – Short-term context retention across turns
36
  - ✏️ **Query Rewriting** – Context-aware reformulation using chat history
 
37
  - πŸ” **Evaluation Framework** – Built-in retrieval quality assessment
38
  - πŸ§ͺ **Ablation Studies** – Baseline comparisons and performance validation
39
 
 
34
  - πŸ“š **Citation-Aware Responses** – Grounded answers with source attribution
35
  - 🧩 **Conversation Memory** – Short-term context retention across turns
36
  - ✏️ **Query Rewriting** – Context-aware reformulation using chat history
37
+ - ⚑ **Token Limit Protection** – Automatic document size validation to prevent API errors
38
  - πŸ” **Evaluation Framework** – Built-in retrieval quality assessment
39
  - πŸ§ͺ **Ablation Studies** – Baseline comparisons and performance validation
40
 
backend/app/api/routes_chat.py CHANGED
@@ -38,7 +38,21 @@ def chat(request: ChatRequest) -> ChatResponse:
38
  citations=[],
39
  )
40
 
 
41
  context = "\n\n".join(chunk.text for chunk in chunks)
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  messages = build_summary_prompt(context)
43
 
44
  answer = llm_chat(messages=messages)
 
38
  citations=[],
39
  )
40
 
41
+ # ADD TOKEN CHECK HERE
42
  context = "\n\n".join(chunk.text for chunk in chunks)
43
+ estimated_tokens = len(context) // 4
44
+
45
+ from app.config import settings
46
+
47
+ if estimated_tokens > settings.max_summary_tokens:
48
+ return ChatResponse(
49
+ answer=f"The selected documents are too large \
50
+ to summarize ({estimated_tokens:,} tokens). "
51
+ f"Maximum allowed: {settings.max_summary_tokens:,} tokens. "
52
+ f"Please select fewer documents or upload smaller PDFs.",
53
+ citations=[],
54
+ )
55
+
56
  messages = build_summary_prompt(context)
57
 
58
  answer = llm_chat(messages=messages)
backend/app/api/routes_docs.py CHANGED
@@ -105,3 +105,26 @@ def list_documents() -> dict:
105
  "total_chunks": len(chunks),
106
  "doc_ids": doc_ids,
107
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  "total_chunks": len(chunks),
106
  "doc_ids": doc_ids,
107
  }
108
+
109
+
110
+ @router.get("/token-counts")
111
+ def get_document_token_counts() -> dict:
112
+ """Get approximate token counts for all documents.
113
+
114
+ Returns:
115
+ Dictionary with doc_id -> token_count mapping and max limit
116
+ """
117
+ from app.retrieval.chunk_registry import get_chunks
118
+
119
+ chunks = get_chunks()
120
+ doc_token_counts = {}
121
+
122
+ for chunk in chunks:
123
+ # Rough estimate: 1 token β‰ˆ 4 characters
124
+ tokens = len(chunk.text) // 4
125
+ doc_token_counts[chunk.doc_id] = doc_token_counts.get(chunk.doc_id, 0) + tokens
126
+
127
+ return {
128
+ "doc_token_counts": doc_token_counts,
129
+ "max_summary_tokens": settings.max_summary_tokens,
130
+ }
backend/app/config.py CHANGED
@@ -10,6 +10,7 @@ class Settings(BaseSettings):
10
  default_model: str = "openai/gpt-oss-120b"
11
  qdrant_path: str = "/tmp/qdrant"
12
  docs_path: str = "/tmp/docs"
 
13
 
14
  class Config:
15
  """Pydantic Settings configuration."""
 
10
  default_model: str = "openai/gpt-oss-120b"
11
  qdrant_path: str = "/tmp/qdrant"
12
  docs_path: str = "/tmp/docs"
13
+ max_summary_tokens: int = 6000 # Conservative limit for model openai/gpt-oss-120b
14
 
15
  class Config:
16
  """Pydantic Settings configuration."""
frontend/app/page.tsx CHANGED
@@ -1,7 +1,7 @@
1
  'use client'
2
 
3
  import React, { useState, useEffect, useRef } from 'react';
4
- import { Upload, Send, FileText, MessageSquare, Loader2, AlertCircle, Trash2 } from 'lucide-react';
5
 
6
  // Add custom scrollbar styles
7
  const customScrollbarStyles = `
@@ -47,6 +47,11 @@ interface UploadedDocument {
47
  uploadedAt: string;
48
  }
49
 
 
 
 
 
 
50
  type ChatMode = 'qa' | 'summarize';
51
 
52
  // API SERVICE
@@ -82,6 +87,16 @@ class ApiService {
82
  return response.json();
83
  }
84
 
 
 
 
 
 
 
 
 
 
 
85
  static async chat(
86
  query: string,
87
  mode: ChatMode,
@@ -529,11 +544,21 @@ function ChatInterface({
529
  const [loading, setLoading] = useState(false);
530
  const [error, setError] = useState<string | null>(null);
531
  const [selectedDocIds, setSelectedDocIds] = useState<string[]>([]);
 
532
  const chatEndRef = useRef<HTMLDivElement>(null);
533
 
534
  const messages = mode === 'qa' ? qaMessages : summarizeMessages;
535
  const setMessages = mode === 'qa' ? setQaMessages : setSummarizeMessages;
536
 
 
 
 
 
 
 
 
 
 
537
  useEffect(() => {
538
  if (documents.length > 0 && selectedDocIds.length === 0) {
539
  setSelectedDocIds(documents.map(d => d.doc_id));
@@ -631,7 +656,36 @@ function ChatInterface({
631
  }
632
  };
633
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634
  const toggleDocSelection = (docId: string) => {
 
 
635
  setSelectedDocIds(prev =>
636
  prev.includes(docId)
637
  ? prev.filter(id => id !== docId)
@@ -640,7 +694,25 @@ function ChatInterface({
640
  };
641
 
642
  const selectAllDocs = () => {
643
- setSelectedDocIds(documents.map(d => d.doc_id));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
  };
645
 
646
  const deselectAllDocs = () => {
@@ -654,6 +726,10 @@ function ChatInterface({
654
  }
655
  };
656
 
 
 
 
 
657
  return (
658
  <div className="flex flex-col h-full">
659
  <div className="flex-shrink-0 border-b border-gray-200 bg-white p-4 shadow-sm">
@@ -723,7 +799,9 @@ function ChatInterface({
723
  <div className="flex-1 overflow-y-auto p-6 space-y-4 bg-gradient-to-b from-gray-50 to-white custom-scrollbar">
724
  {messages.length === 0 && (
725
  <div className="h-full flex items-center justify-center text-gray-400">
726
- <div className="text-center p-8">
 
 
727
  {mode === 'qa' ? (
728
  <>
729
  <div className="w-20 h-20 mx-auto mb-4 bg-blue-100 rounded-full flex items-center justify-center">
@@ -835,26 +913,71 @@ function ChatInterface({
835
  </div>
836
  </div>
837
  <div className="space-y-2 max-h-48 overflow-y-auto pr-2 custom-scrollbar">
838
- {documents.map(doc => (
839
- <label
840
- key={doc.doc_id}
841
- className="flex items-center gap-3 p-3 hover:bg-white rounded-lg cursor-pointer transition-colors border border-transparent hover:border-blue-200"
842
- >
843
- <input
844
- type="checkbox"
845
- checked={selectedDocIds.includes(doc.doc_id)}
846
- onChange={() => toggleDocSelection(doc.doc_id)}
847
- className="w-4 h-4 text-blue-600 rounded focus:ring-2 focus:ring-blue-500"
848
- />
849
- <FileText className="w-4 h-4 text-blue-600 flex-shrink-0" />
850
- <span className="text-sm text-gray-700 truncate flex-1 font-medium">
851
- {doc.filename}
852
- </span>
853
- </label>
854
- ))}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
855
  </div>
856
  </div>
857
  )}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
  <button
859
  onClick={handleSummarize}
860
  disabled={loading || !documentsUploaded || selectedDocIds.length === 0}
 
1
  'use client'
2
 
3
  import React, { useState, useEffect, useRef } from 'react';
4
+ import { Upload, Send, FileText, MessageSquare, Loader2, AlertCircle, Trash2, AlertTriangle } from 'lucide-react';
5
 
6
  // Add custom scrollbar styles
7
  const customScrollbarStyles = `
 
47
  uploadedAt: string;
48
  }
49
 
50
+ interface TokenCounts {
51
+ doc_token_counts: Record<string, number>;
52
+ max_summary_tokens: number;
53
+ }
54
+
55
  type ChatMode = 'qa' | 'summarize';
56
 
57
  // API SERVICE
 
87
  return response.json();
88
  }
89
 
90
+ static async getTokenCounts(): Promise<TokenCounts> {
91
+ const response = await fetch(`${API_BASE}/docs/token-counts`);
92
+
93
+ if (!response.ok) {
94
+ throw new Error(`Failed to fetch token counts: ${response.statusText}`);
95
+ }
96
+
97
+ return response.json();
98
+ }
99
+
100
  static async chat(
101
  query: string,
102
  mode: ChatMode,
 
544
  const [loading, setLoading] = useState(false);
545
  const [error, setError] = useState<string | null>(null);
546
  const [selectedDocIds, setSelectedDocIds] = useState<string[]>([]);
547
+ const [tokenCounts, setTokenCounts] = useState<TokenCounts | null>(null);
548
  const chatEndRef = useRef<HTMLDivElement>(null);
549
 
550
  const messages = mode === 'qa' ? qaMessages : summarizeMessages;
551
  const setMessages = mode === 'qa' ? setQaMessages : setSummarizeMessages;
552
 
553
+ // Fetch token counts when documents change
554
+ useEffect(() => {
555
+ if (documents.length > 0) {
556
+ ApiService.getTokenCounts()
557
+ .then(setTokenCounts)
558
+ .catch(err => console.error('Failed to fetch token counts:', err));
559
+ }
560
+ }, [documents]);
561
+
562
  useEffect(() => {
563
  if (documents.length > 0 && selectedDocIds.length === 0) {
564
  setSelectedDocIds(documents.map(d => d.doc_id));
 
656
  }
657
  };
658
 
659
+ const getSelectedTokenCount = (): number => {
660
+ if (!tokenCounts) return 0;
661
+ return selectedDocIds.reduce((sum, docId) => {
662
+ return sum + (tokenCounts.doc_token_counts[docId] || 0);
663
+ }, 0);
664
+ };
665
+
666
+ const canSelectDoc = (docId: string): boolean => {
667
+ if (!tokenCounts || mode !== 'summarize') return true;
668
+
669
+ const currentTokens = getSelectedTokenCount();
670
+ const docTokens = tokenCounts.doc_token_counts[docId] || 0;
671
+
672
+ // If already selected, can always deselect
673
+ if (selectedDocIds.includes(docId)) return true;
674
+
675
+ // Check if adding this doc would exceed limit
676
+ return (currentTokens + docTokens) <= tokenCounts.max_summary_tokens;
677
+ };
678
+
679
+ const getDocTokenInfo = (docId: string): { tokens: number; tooLarge: boolean } => {
680
+ if (!tokenCounts) return { tokens: 0, tooLarge: false };
681
+ const tokens = tokenCounts.doc_token_counts[docId] || 0;
682
+ const tooLarge = tokens > tokenCounts.max_summary_tokens;
683
+ return { tokens, tooLarge };
684
+ };
685
+
686
  const toggleDocSelection = (docId: string) => {
687
+ if (!canSelectDoc(docId)) return;
688
+
689
  setSelectedDocIds(prev =>
690
  prev.includes(docId)
691
  ? prev.filter(id => id !== docId)
 
694
  };
695
 
696
  const selectAllDocs = () => {
697
+ if (!tokenCounts || mode !== 'summarize') {
698
+ setSelectedDocIds(documents.map(d => d.doc_id));
699
+ return;
700
+ }
701
+
702
+ // Add docs one by one until limit reached
703
+ const validDocs: string[] = [];
704
+ let totalTokens = 0;
705
+
706
+ for (const doc of documents) {
707
+ const docTokens = tokenCounts.doc_token_counts[doc.doc_id] || 0;
708
+ if (docTokens > tokenCounts.max_summary_tokens) continue;
709
+ if (totalTokens + docTokens <= tokenCounts.max_summary_tokens) {
710
+ validDocs.push(doc.doc_id);
711
+ totalTokens += docTokens;
712
+ }
713
+ }
714
+
715
+ setSelectedDocIds(validDocs);
716
  };
717
 
718
  const deselectAllDocs = () => {
 
726
  }
727
  };
728
 
729
+ const selectedTokens = getSelectedTokenCount();
730
+ const maxTokens = tokenCounts?.max_summary_tokens || 0;
731
+ const tokenPercentage = maxTokens > 0 ? (selectedTokens / maxTokens) * 100 : 0;
732
+
733
  return (
734
  <div className="flex flex-col h-full">
735
  <div className="flex-shrink-0 border-b border-gray-200 bg-white p-4 shadow-sm">
 
799
  <div className="flex-1 overflow-y-auto p-6 space-y-4 bg-gradient-to-b from-gray-50 to-white custom-scrollbar">
800
  {messages.length === 0 && (
801
  <div className="h-full flex items-center justify-center text-gray-400">
802
+
803
+
804
+ <div className="text-center p-8">
805
  {mode === 'qa' ? (
806
  <>
807
  <div className="w-20 h-20 mx-auto mb-4 bg-blue-100 rounded-full flex items-center justify-center">
 
913
  </div>
914
  </div>
915
  <div className="space-y-2 max-h-48 overflow-y-auto pr-2 custom-scrollbar">
916
+ {documents.map(doc => {
917
+ const { tokens, tooLarge } = getDocTokenInfo(doc.doc_id);
918
+ const selectable = canSelectDoc(doc.doc_id);
919
+
920
+ return (
921
+ <label
922
+ key={doc.doc_id}
923
+ className={`flex items-center gap-3 p-3 rounded-lg cursor-pointer transition-colors border ${
924
+ !selectable && !selectedDocIds.includes(doc.doc_id)
925
+ ? 'opacity-50 cursor-not-allowed bg-gray-100 border-gray-300'
926
+ : 'hover:bg-white border-transparent hover:border-blue-200'
927
+ }`}
928
+ title={!selectable ? `Adding this document would exceed token limit (${tokens.toLocaleString()} tokens)` : ''}
929
+ >
930
+ <input
931
+ type="checkbox"
932
+ checked={selectedDocIds.includes(doc.doc_id)}
933
+ onChange={() => toggleDocSelection(doc.doc_id)}
934
+ disabled={!selectable && !selectedDocIds.includes(doc.doc_id)}
935
+ className="w-4 h-4 text-blue-600 rounded focus:ring-2 focus:ring-blue-500 disabled:cursor-not-allowed"
936
+ />
937
+ <FileText className={`w-4 h-4 flex-shrink-0 ${tooLarge ? 'text-red-500' : 'text-blue-600'}`} />
938
+ <div className="flex-1 min-w-0">
939
+ <span className="text-sm text-gray-700 truncate block font-medium">
940
+ {doc.filename}
941
+ </span>
942
+ {tooLarge && (
943
+ <span className="text-xs text-red-600 flex items-center gap-1 mt-1">
944
+ <AlertTriangle className="w-3 h-3" />
945
+ Too large ({tokens.toLocaleString()} tokens, max: {maxTokens.toLocaleString()})
946
+ </span>
947
+ )}
948
+ </div>
949
+ </label>
950
+ );
951
+ })}
952
  </div>
953
  </div>
954
  )}
955
+
956
+ {mode === 'summarize' && tokenCounts && selectedDocIds.length > 0 && (
957
+ <div className="bg-blue-50 border border-blue-200 rounded-xl p-4">
958
+ <div className="flex items-center justify-between mb-2">
959
+ <span className="text-sm font-medium text-gray-700">Token Usage</span>
960
+ <span className="text-sm font-bold text-blue-600">
961
+ {selectedTokens.toLocaleString()} / {maxTokens.toLocaleString()}
962
+ </span>
963
+ </div>
964
+ <div className="w-full bg-gray-200 rounded-full h-2">
965
+ <div
966
+ className={`h-2 rounded-full transition-all ${
967
+ tokenPercentage > 90 ? 'bg-red-500' : tokenPercentage > 70 ? 'bg-yellow-500' : 'bg-blue-600'
968
+ }`}
969
+ style={{ width: `${Math.min(tokenPercentage, 100)}%` }}
970
+ />
971
+ </div>
972
+ {tokenPercentage > 90 && (
973
+ <p className="text-xs text-red-600 mt-2 flex items-center gap-1">
974
+ <AlertTriangle className="w-3 h-3" />
975
+ Warning: Approaching token limit. Consider deselecting some documents.
976
+ </p>
977
+ )}
978
+ </div>
979
+ )}
980
+
981
  <button
982
  onClick={handleSummarize}
983
  disabled={loading || !documentsUploaded || selectedDocIds.length === 0}