riteshraut commited on
Commit
ba63231
Β·
1 Parent(s): 46af083

fix/some bugs

Browse files
app.py CHANGED
@@ -1,5 +1,3 @@
1
- # app.py
2
-
3
  import os
4
  import time
5
  import uuid
@@ -11,7 +9,6 @@ import fitz
11
  import re
12
  import io
13
  from gtts import gTTS
14
-
15
  from langchain_core.documents import Document
16
  from langchain_community.document_loaders import (
17
  TextLoader,
@@ -26,12 +23,8 @@ from langchain_community.retrievers import BM25Retriever
26
  from langchain_community.chat_message_histories import ChatMessageHistory
27
  from langchain.storage import InMemoryStore
28
  from sentence_transformers.cross_encoder import CrossEncoder
29
-
30
-
31
  app = Flask(__name__)
32
  app.config['SECRET_KEY'] = os.urandom(24)
33
-
34
-
35
  class LocalReranker(BaseDocumentCompressor):
36
  model: Any
37
  top_n: int = 3
@@ -59,7 +52,7 @@ class LocalReranker(BaseDocumentCompressor):
59
  doc.metadata['rerank_score'] = float(score)
60
  top_docs.append(doc)
61
  return top_docs
62
-
63
  is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
64
  if is_hf_spaces:
65
  app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
@@ -68,12 +61,12 @@ else:
68
 
69
  try:
70
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
71
- print(f"βœ“ Upload folder ready: {app.config['UPLOAD_FOLDER']}")
72
  except Exception as e:
73
- print(f"βœ— Failed to create upload folder {app.config['UPLOAD_FOLDER']}: {e}")
74
  app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
75
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
76
- print(f"βœ“ Using fallback upload folder: {app.config['UPLOAD_FOLDER']}")
77
 
78
  rag_chains = {}
79
  message_histories = {}
@@ -84,17 +77,17 @@ try:
84
  model_name="sentence-transformers/all-MiniLM-L6-v2",
85
  model_kwargs={'device': 'cpu'}
86
  )
87
- print("βœ“ Embedding model loaded successfully.")
88
  except Exception as e:
89
- print(f"βœ— FATAL: Could not load embedding model. Error: {e}")
90
  raise e
91
 
92
  print("Loading local re-ranking model...")
93
  try:
94
  RERANKER_MODEL = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", device='cpu')
95
- print("βœ“ Re-ranking model loaded successfully.")
96
  except Exception as e:
97
- print(f"βœ— FATAL: Could not load reranker model. Error: {e}")
98
  raise e
99
 
100
  def load_pdf_with_fallback(filepath):
@@ -112,12 +105,12 @@ def load_pdf_with_fallback(filepath):
112
  }
113
  ))
114
  if docs:
115
- print(f"βœ“ Successfully loaded PDF with PyMuPDF: {filepath}")
116
  return docs
117
  else:
118
  raise ValueError("No text content found in PDF.")
119
  except Exception as e:
120
- print(f"βœ— PyMuPDF failed for {filepath}: {e}")
121
  raise
122
 
123
  LOADER_MAPPING = {
@@ -200,7 +193,7 @@ def upload_files():
200
 
201
  store.mset(list(zip(doc_ids, parent_docs)))
202
  vectorstore.add_documents(child_docs)
203
- print(f"βœ“ Stored {len(parent_docs)} parent docs and indexed {len(child_docs)} child docs.")
204
 
205
  bm25_retriever = BM25Retriever.from_documents(child_docs)
206
  bm25_retriever.k = 8
@@ -211,7 +204,7 @@ def upload_files():
211
  retrievers=[bm25_retriever, faiss_retriever],
212
  weights=[0.4, 0.6]
213
  )
214
- print("βœ“ Created Hybrid Retriever for child documents.")
215
 
216
  reranker = LocalReranker(model=RERANKER_MODEL, top_n=4)
217
 
@@ -225,7 +218,7 @@ def upload_files():
225
 
226
  final_retriever = compression_retriever | get_parents
227
 
228
- print("βœ“ Final retriever chain created: (Hybrid -> Rerank) -> Parent Fetch")
229
 
230
  session_id = str(uuid.uuid4())
231
  rag_chain = create_rag_chain(final_retriever, get_session_history)
@@ -259,12 +252,7 @@ def chat():
259
  try:
260
  rag_chain = rag_chains[session_id]
261
  config = {"configurable": {"session_id": session_id}}
262
-
263
- # Invoke the chain, which will return a string
264
  answer_string = rag_chain.invoke({"question": question}, config=config)
265
-
266
- # --- THIS IS THE FIX ---
267
- # Directly use the returned string in the JSON response.
268
  return jsonify({'answer': answer_string})
269
 
270
  except Exception as e:
 
 
 
1
  import os
2
  import time
3
  import uuid
 
9
  import re
10
  import io
11
  from gtts import gTTS
 
12
  from langchain_core.documents import Document
13
  from langchain_community.document_loaders import (
14
  TextLoader,
 
23
  from langchain_community.chat_message_histories import ChatMessageHistory
24
  from langchain.storage import InMemoryStore
25
  from sentence_transformers.cross_encoder import CrossEncoder
 
 
26
  app = Flask(__name__)
27
  app.config['SECRET_KEY'] = os.urandom(24)
 
 
28
  class LocalReranker(BaseDocumentCompressor):
29
  model: Any
30
  top_n: int = 3
 
52
  doc.metadata['rerank_score'] = float(score)
53
  top_docs.append(doc)
54
  return top_docs
55
+
56
  is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
57
  if is_hf_spaces:
58
  app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
 
61
 
62
  try:
63
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
64
+ print(f"Upload folder ready: {app.config['UPLOAD_FOLDER']}")
65
  except Exception as e:
66
+ print(f"Failed to create upload folder {app.config['UPLOAD_FOLDER']}: {e}")
67
  app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
68
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
69
+ print(f"Using fallback upload folder: {app.config['UPLOAD_FOLDER']}")
70
 
71
  rag_chains = {}
72
  message_histories = {}
 
77
  model_name="sentence-transformers/all-MiniLM-L6-v2",
78
  model_kwargs={'device': 'cpu'}
79
  )
80
+ print("Embedding model loaded successfully.")
81
  except Exception as e:
82
+ print(f"FATAL: Could not load embedding model. Error: {e}")
83
  raise e
84
 
85
  print("Loading local re-ranking model...")
86
  try:
87
  RERANKER_MODEL = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", device='cpu')
88
+ print("Re-ranking model loaded successfully.")
89
  except Exception as e:
90
+ print(f"FATAL: Could not load reranker model. Error: {e}")
91
  raise e
92
 
93
  def load_pdf_with_fallback(filepath):
 
105
  }
106
  ))
107
  if docs:
108
+ print(f"Successfully loaded PDF with PyMuPDF: {filepath}")
109
  return docs
110
  else:
111
  raise ValueError("No text content found in PDF.")
112
  except Exception as e:
113
+ print(f"PyMuPDF failed for {filepath}: {e}")
114
  raise
115
 
116
  LOADER_MAPPING = {
 
193
 
194
  store.mset(list(zip(doc_ids, parent_docs)))
195
  vectorstore.add_documents(child_docs)
196
+ print(f"Stored {len(parent_docs)} parent docs and indexed {len(child_docs)} child docs.")
197
 
198
  bm25_retriever = BM25Retriever.from_documents(child_docs)
199
  bm25_retriever.k = 8
 
204
  retrievers=[bm25_retriever, faiss_retriever],
205
  weights=[0.4, 0.6]
206
  )
207
+ print("Created Hybrid Retriever for child documents.")
208
 
209
  reranker = LocalReranker(model=RERANKER_MODEL, top_n=4)
210
 
 
218
 
219
  final_retriever = compression_retriever | get_parents
220
 
221
+ print("Final retriever chain created: (Hybrid -> Rerank) -> Parent Fetch")
222
 
223
  session_id = str(uuid.uuid4())
224
  rag_chain = create_rag_chain(final_retriever, get_session_history)
 
252
  try:
253
  rag_chain = rag_chains[session_id]
254
  config = {"configurable": {"session_id": session_id}}
 
 
255
  answer_string = rag_chain.invoke({"question": question}, config=config)
 
 
 
256
  return jsonify({'answer': answer_string})
257
 
258
  except Exception as e:
rag_processor.py CHANGED
@@ -1,16 +1,8 @@
1
- # rag_processor.py
2
-
3
  import os
4
  from dotenv import load_dotenv
5
- from operator import itemgetter # <--- ADD THIS IMPORT
6
-
7
- # LLM
8
  from langchain_groq import ChatGroq
9
-
10
- # Prompting
11
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
12
-
13
- # Chains
14
  from langchain_core.runnables import RunnableParallel, RunnablePassthrough
15
  from langchain_core.output_parsers import StrOutputParser
16
  from langchain_core.runnables.history import RunnableWithMessageHistory
@@ -30,18 +22,12 @@ def create_rag_chain(retriever, get_session_history_func):
30
  Raises:
31
  ValueError: If the GROQ_API_KEY is missing.
32
  """
33
- # Load environment variables from .env file (for local development)
34
  load_dotenv()
35
-
36
- # Get API key from environment (works for both HF Spaces secrets and local .env)
37
  api_key = os.getenv("GROQ_API_KEY")
38
  print("key loaded")
39
-
40
  if not api_key or api_key == "your_groq_api_key_here":
41
  error_msg = "GROQ_API_KEY not found or not configured properly.\n"
42
  print("Not found key")
43
-
44
- # Detect if running in Hugging Face Spaces
45
  if os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"):
46
  error_msg += (
47
  "For Hugging Face Spaces: Set GROQ_API_KEY in your Space's Settings > Repository Secrets.\n"
@@ -55,12 +41,8 @@ def create_rag_chain(retriever, get_session_history_func):
55
  )
56
 
57
  raise ValueError(error_msg)
58
-
59
- # --- 1. Initialize the LLM ---
60
- # Updated model_name to a standard, high-performance Groq model
61
  llm = ChatGroq(model_name="llama-3.1-8b-instant", api_key=api_key, temperature=0.1)
62
 
63
- # --- 2. Create Query Rewriting Chain 🧠 ---
64
  print("\nSetting up query rewriting chain...")
65
  rewrite_template = """You are an expert at rewriting user questions for a vector database.
66
  You are here to help the user with their document.
@@ -79,8 +61,6 @@ Standalone Question:"""
79
  ("human", "Based on our conversation, reformulate this question to be a standalone query: {question}")
80
  ])
81
  query_rewriter = rewrite_prompt | llm | StrOutputParser()
82
-
83
- # --- 3. Create Main RAG Chain with Memory ---
84
  print("\nSetting up main RAG chain...")
85
  rag_template = """You are CogniChat, an expert document analysis assistant.
86
  IMPORTANT RULES:
@@ -95,35 +75,24 @@ Context:
95
  MessagesPlaceholder(variable_name="chat_history"),
96
  ("human", "{question}"),
97
  ])
98
-
99
- # ============================ FIX IS HERE ============================
100
- # Parallel process to fetch context and correctly pass through question and history.
101
- # We use itemgetter to select the specific keys from the input dictionary.
102
  setup_and_retrieval = RunnableParallel({
103
  "context": RunnablePassthrough.assign(
104
  rewritten_question=query_rewriter
105
  ) | (lambda x: x["rewritten_question"]) | retriever,
106
  "question": itemgetter("question"),
107
  "chat_history": itemgetter("chat_history")})
108
- # =====================================================================
109
-
110
- # The initial RAG chain
111
  conversational_rag_chain = (
112
  setup_and_retrieval
113
  | rag_prompt
114
  | llm
115
  | StrOutputParser()
116
  )
117
-
118
- # Wrap the chain with memory management
119
  chain_with_memory = RunnableWithMessageHistory(
120
  conversational_rag_chain,
121
  get_session_history_func,
122
  input_messages_key="question",
123
  history_messages_key="chat_history",
124
  )
125
-
126
- # --- 4. Create Answer Refinement Chain ✨ ---
127
  print("\nSetting up answer refinement chain...")
128
  refine_template = """You are an expert at editing and refining content.
129
  Your task is to take a given answer and improve its clarity, structure, and readability.
@@ -137,9 +106,6 @@ Refined Answer:"""
137
  refine_prompt = ChatPromptTemplate.from_template(refine_template)
138
  refinement_chain = refine_prompt | llm | StrOutputParser()
139
 
140
- # --- 5. Combine Everything into the Final Chain ---
141
- # The final chain passes the output of the memory-enabled chain to the refinement chain
142
- # Note: We need to adapt the input for the refinement chain
143
  final_chain = (
144
  lambda input_dict: {"answer": chain_with_memory.invoke(input_dict, config=input_dict.get('config'))}
145
  ) | refinement_chain
 
 
 
1
  import os
2
  from dotenv import load_dotenv
3
+ from operator import itemgetter
 
 
4
  from langchain_groq import ChatGroq
 
 
5
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 
 
6
  from langchain_core.runnables import RunnableParallel, RunnablePassthrough
7
  from langchain_core.output_parsers import StrOutputParser
8
  from langchain_core.runnables.history import RunnableWithMessageHistory
 
22
  Raises:
23
  ValueError: If the GROQ_API_KEY is missing.
24
  """
 
25
  load_dotenv()
 
 
26
  api_key = os.getenv("GROQ_API_KEY")
27
  print("key loaded")
 
28
  if not api_key or api_key == "your_groq_api_key_here":
29
  error_msg = "GROQ_API_KEY not found or not configured properly.\n"
30
  print("Not found key")
 
 
31
  if os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"):
32
  error_msg += (
33
  "For Hugging Face Spaces: Set GROQ_API_KEY in your Space's Settings > Repository Secrets.\n"
 
41
  )
42
 
43
  raise ValueError(error_msg)
 
 
 
44
  llm = ChatGroq(model_name="llama-3.1-8b-instant", api_key=api_key, temperature=0.1)
45
 
 
46
  print("\nSetting up query rewriting chain...")
47
  rewrite_template = """You are an expert at rewriting user questions for a vector database.
48
  You are here to help the user with their document.
 
61
  ("human", "Based on our conversation, reformulate this question to be a standalone query: {question}")
62
  ])
63
  query_rewriter = rewrite_prompt | llm | StrOutputParser()
 
 
64
  print("\nSetting up main RAG chain...")
65
  rag_template = """You are CogniChat, an expert document analysis assistant.
66
  IMPORTANT RULES:
 
75
  MessagesPlaceholder(variable_name="chat_history"),
76
  ("human", "{question}"),
77
  ])
 
 
 
 
78
  setup_and_retrieval = RunnableParallel({
79
  "context": RunnablePassthrough.assign(
80
  rewritten_question=query_rewriter
81
  ) | (lambda x: x["rewritten_question"]) | retriever,
82
  "question": itemgetter("question"),
83
  "chat_history": itemgetter("chat_history")})
 
 
 
84
  conversational_rag_chain = (
85
  setup_and_retrieval
86
  | rag_prompt
87
  | llm
88
  | StrOutputParser()
89
  )
 
 
90
  chain_with_memory = RunnableWithMessageHistory(
91
  conversational_rag_chain,
92
  get_session_history_func,
93
  input_messages_key="question",
94
  history_messages_key="chat_history",
95
  )
 
 
96
  print("\nSetting up answer refinement chain...")
97
  refine_template = """You are an expert at editing and refining content.
98
  Your task is to take a given answer and improve its clarity, structure, and readability.
 
106
  refine_prompt = ChatPromptTemplate.from_template(refine_template)
107
  refinement_chain = refine_prompt | llm | StrOutputParser()
108
 
 
 
 
109
  final_chain = (
110
  lambda input_dict: {"answer": chain_with_memory.invoke(input_dict, config=input_dict.get('config'))}
111
  ) | refinement_chain
test_dependencies.py CHANGED
@@ -1,11 +1,7 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test script to verify all dependencies and PDF handling capabilities.
4
- """
5
  import os
6
  import sys
7
 
8
- print("=== CogniChat Dependencies & PDF Handling Test ===")
9
 
10
  # Test imports
11
  try:
@@ -14,62 +10,50 @@ try:
14
  from langchain_community.retrievers import BM25Retriever
15
  from langchain.text_splitter import RecursiveCharacterTextSplitter
16
  from langchain_core.documents import Document
17
- print("βœ“ Core LangChain imports successful!")
18
 
19
  except ImportError as e:
20
- print(f"βœ— Import error: {e}")
21
  if "rank_bm25" in str(e):
22
- print(" β†’ Missing dependency: pip install rank-bm25==0.2.2")
23
  sys.exit(1)
24
-
25
- # Test PDF loading capabilities
26
  try:
27
  print("\nTesting PDF loading capabilities...")
28
-
29
- # Test PyPDF availability
30
  try:
31
  from langchain_community.document_loaders import PyPDFLoader
32
- print("βœ“ PyPDFLoader available")
33
  except ImportError:
34
- print("βœ— PyPDFLoader not available")
35
-
36
- # Test PyMuPDF availability
37
  try:
38
  import fitz
39
- print("βœ“ PyMuPDF (fitz) available - can handle corrupted PDFs")
40
  except ImportError:
41
- print("βœ— PyMuPDF (fitz) not available")
42
 
43
- # Test pdfplumber availability
44
  try:
45
  import pdfplumber
46
- print("βœ“ pdfplumber available - additional PDF parsing method")
47
  except ImportError:
48
- print("βœ— pdfplumber not available")
49
 
50
  except Exception as e:
51
- print(f"βœ— Error testing PDF capabilities: {e}")
52
-
53
- # Test BM25 Retriever
54
  try:
55
  print("\nTesting BM25 Retriever...")
56
 
57
- # Create some test documents
58
  test_docs = [
59
  Document(page_content="This is the first test document about machine learning."),
60
  Document(page_content="This is the second document discussing natural language processing."),
61
  Document(page_content="The third document covers artificial intelligence topics."),
62
  ]
63
 
64
- # Create BM25 retriever
65
  bm25_retriever = BM25Retriever.from_documents(test_docs)
66
  bm25_retriever.k = 2
67
-
68
- # Test retrieval
69
  query = "machine learning"
70
  results = bm25_retriever.get_relevant_documents(query)
71
- print(f"βœ“ BM25 retriever created and tested successfully!")
72
- print(f"βœ“ Retrieved {len(results)} documents for query: '{query}'")
73
 
74
  except Exception as e:
75
  print(f"βœ— Error testing BM25 retriever: {e}")
@@ -77,7 +61,7 @@ except Exception as e:
77
  traceback.print_exc()
78
  sys.exit(1)
79
 
80
- print("\n=== All tests completed successfully! ===")
81
  print("\nThe application should now handle:")
82
  print(" β€’ Regular file uploads and processing")
83
  print(" β€’ Corrupted PDF files with multiple fallback methods")
@@ -86,8 +70,7 @@ print(" β€’ Proper error messages for failed file processing")
86
  print("\nMake sure to install all dependencies with:")
87
  print(" pip install -r requirements.txt")
88
 
89
- # Display dependency summary
90
- print("\n=== Key Dependencies Added/Updated ===")
91
  print(" β€’ rank-bm25==0.2.2 (for BM25 retrieval)")
92
  print(" β€’ pymupdf==1.23.26 (PDF fallback method)")
93
  print(" β€’ pdfplumber==0.10.3 (additional PDF parsing)")
 
 
 
 
 
1
  import os
2
  import sys
3
 
4
+ print("CogniChat Dependencies & PDF Handling Test")
5
 
6
  # Test imports
7
  try:
 
10
  from langchain_community.retrievers import BM25Retriever
11
  from langchain.text_splitter import RecursiveCharacterTextSplitter
12
  from langchain_core.documents import Document
13
+ print("Core LangChain imports successful!")
14
 
15
  except ImportError as e:
16
+ print(f"Import error: {e}")
17
  if "rank_bm25" in str(e):
18
+ print("Missing dependency: pip install rank-bm25==0.2.2")
19
  sys.exit(1)
 
 
20
  try:
21
  print("\nTesting PDF loading capabilities...")
 
 
22
  try:
23
  from langchain_community.document_loaders import PyPDFLoader
24
+ print("PyPDFLoader available")
25
  except ImportError:
26
+ print("PyPDFLoader not available")
27
+
 
28
  try:
29
  import fitz
30
+ print("PyMuPDF (fitz) available - can handle corrupted PDFs")
31
  except ImportError:
32
+ print("PyMuPDF (fitz) not available")
33
 
 
34
  try:
35
  import pdfplumber
36
+ print("pdfplumber available - additional PDF parsing method")
37
  except ImportError:
38
+ print("pdfplumber not available")
39
 
40
  except Exception as e:
41
+ print(f"Error testing PDF capabilities: {e}")
 
 
42
  try:
43
  print("\nTesting BM25 Retriever...")
44
 
 
45
  test_docs = [
46
  Document(page_content="This is the first test document about machine learning."),
47
  Document(page_content="This is the second document discussing natural language processing."),
48
  Document(page_content="The third document covers artificial intelligence topics."),
49
  ]
50
 
 
51
  bm25_retriever = BM25Retriever.from_documents(test_docs)
52
  bm25_retriever.k = 2
 
 
53
  query = "machine learning"
54
  results = bm25_retriever.get_relevant_documents(query)
55
+ print(f"BM25 retriever created and tested successfully!")
56
+ print(f"Retrieved {len(results)} documents for query: '{query}'")
57
 
58
  except Exception as e:
59
  print(f"βœ— Error testing BM25 retriever: {e}")
 
61
  traceback.print_exc()
62
  sys.exit(1)
63
 
64
+ print("\nAll tests completed successfully!")
65
  print("\nThe application should now handle:")
66
  print(" β€’ Regular file uploads and processing")
67
  print(" β€’ Corrupted PDF files with multiple fallback methods")
 
70
  print("\nMake sure to install all dependencies with:")
71
  print(" pip install -r requirements.txt")
72
 
73
+ print("\nKey Dependencies Added/Updated")
 
74
  print(" β€’ rank-bm25==0.2.2 (for BM25 retrieval)")
75
  print(" β€’ pymupdf==1.23.26 (PDF fallback method)")
76
  print(" β€’ pdfplumber==0.10.3 (additional PDF parsing)")
test_hf_spaces_session.py CHANGED
@@ -1,18 +1,11 @@
1
- #!/usr/bin/env python3
2
- """
3
- Quick test to diagnose the current HF Spaces chat issue.
4
- Run this after uploading a document to test the session state.
5
- """
6
  import requests
7
  import json
8
-
9
- # Replace with your actual Space URL
10
- BASE_URL = "https://hyperxd-0-cognichat.hf.space" # Update this to your actual Space URL
11
 
12
  def test_endpoints():
13
  """Test the debug and session endpoints to understand the issue."""
14
 
15
- print("=== CogniChat HF Spaces Diagnostic ===\n")
16
 
17
  # Test 1: Check debug endpoint
18
  print("1. Testing /debug endpoint...")
@@ -20,7 +13,7 @@ def test_endpoints():
20
  response = requests.get(f"{BASE_URL}/debug")
21
  if response.status_code == 200:
22
  data = response.json()
23
- print("βœ“ Debug endpoint working")
24
  print(f" Environment: {data.get('environment')}")
25
  print(f" GROQ API Key: {'Set' if data.get('groq_api_key_set') else 'NOT SET'}")
26
  print(f" Sessions count: {data.get('sessions_count')}")
@@ -29,44 +22,39 @@ def test_endpoints():
29
  print(f" Flask session ID: {data.get('flask_session_id')}")
30
  print(f" Session keys: {data.get('flask_session_keys')}")
31
  else:
32
- print(f"βœ— Debug endpoint failed: {response.status_code}")
33
  except Exception as e:
34
- print(f"βœ— Error accessing debug endpoint: {e}")
35
 
36
  print()
37
 
38
- # Test 2: Check session handling
39
  print("2. Testing /test-session endpoint...")
40
  try:
41
- # Create a session with cookies
42
  session = requests.Session()
43
-
44
- # Test session write
45
  response = session.post(f"{BASE_URL}/test-session")
46
  if response.status_code == 200:
47
  data = response.json()
48
- print("βœ“ Session write working")
49
- print(f" Test key: {data.get('test_key')}")
50
- print(f" Session keys: {data.get('session_keys')}")
51
  else:
52
- print(f"βœ— Session write failed: {response.status_code}")
53
 
54
- # Test session read
55
  response = session.get(f"{BASE_URL}/test-session")
56
  if response.status_code == 200:
57
  data = response.json()
58
- print("βœ“ Session read working")
59
  print(f" Test key persisted: {data.get('test_key')}")
60
  print(f" Has session data: {data.get('has_session_data')}")
61
 
62
  if not data.get('test_key'):
63
- print("⚠️ WARNING: Sessions are not persisting between requests!")
64
- print(" This is likely the cause of the 400 chat error.")
65
  else:
66
- print(f"βœ— Session read failed: {response.status_code}")
67
 
68
  except Exception as e:
69
- print(f"βœ— Error testing sessions: {e}")
70
 
71
  print()
72
 
@@ -78,16 +66,16 @@ def test_endpoints():
78
  data = response.json()
79
  session_ids = data.get('session_ids', [])
80
  if session_ids:
81
- print(f"βœ“ Found {len(session_ids)} existing RAG sessions")
82
- print(f" Session IDs: {session_ids[:3]}{'...' if len(session_ids) > 3 else ''}")
83
  else:
84
- print("ℹ️ No RAG sessions found (normal if no documents were uploaded)")
85
 
86
  except Exception as e:
87
- print(f"βœ— Error checking RAG sessions: {e}")
88
 
89
  print()
90
- print("=== Diagnosis Complete ===")
91
  print()
92
  print("LIKELY ISSUE:")
93
  print("If sessions are not persisting, this is a common issue in HF Spaces")
@@ -104,7 +92,4 @@ if __name__ == "__main__":
104
  print("3. Optionally upload a document first")
105
  print()
106
 
107
- # Uncomment the next line and update the URL to run the test
108
- # test_endpoints()
109
-
110
  print("Update the BASE_URL variable above and uncomment the test_endpoints() call")
 
 
 
 
 
 
1
  import requests
2
  import json
3
+ BASE_URL = "https://huggingface.co/spaces/Zeri00/Cogni-chat-document-reader"
 
 
4
 
5
  def test_endpoints():
6
  """Test the debug and session endpoints to understand the issue."""
7
 
8
+ print("CogniChat HF Spaces Diagnostic\n")
9
 
10
  # Test 1: Check debug endpoint
11
  print("1. Testing /debug endpoint...")
 
13
  response = requests.get(f"{BASE_URL}/debug")
14
  if response.status_code == 200:
15
  data = response.json()
16
+ print(" Debug endpoint working")
17
  print(f" Environment: {data.get('environment')}")
18
  print(f" GROQ API Key: {'Set' if data.get('groq_api_key_set') else 'NOT SET'}")
19
  print(f" Sessions count: {data.get('sessions_count')}")
 
22
  print(f" Flask session ID: {data.get('flask_session_id')}")
23
  print(f" Session keys: {data.get('flask_session_keys')}")
24
  else:
25
+ print(f"Debug endpoint failed: {response.status_code}")
26
  except Exception as e:
27
+ print(f"Error accessing debug endpoint: {e}")
28
 
29
  print()
30
 
 
31
  print("2. Testing /test-session endpoint...")
32
  try:
 
33
  session = requests.Session()
 
 
34
  response = session.post(f"{BASE_URL}/test-session")
35
  if response.status_code == 200:
36
  data = response.json()
37
+ print("Session write working")
38
+ print(f"Test key: {data.get('test_key')}")
39
+ print(f"Session keys: {data.get('session_keys')}")
40
  else:
41
+ print(f"Session write failed: {response.status_code}")
42
 
 
43
  response = session.get(f"{BASE_URL}/test-session")
44
  if response.status_code == 200:
45
  data = response.json()
46
+ print("Session read working")
47
  print(f" Test key persisted: {data.get('test_key')}")
48
  print(f" Has session data: {data.get('has_session_data')}")
49
 
50
  if not data.get('test_key'):
51
+ print("WARNING: Sessions are not persisting between requests!")
52
+ print(" This is likely the cause of the 400 chat error.")
53
  else:
54
+ print(f"Session read failed: {response.status_code}")
55
 
56
  except Exception as e:
57
+ print(f"Error testing sessions: {e}")
58
 
59
  print()
60
 
 
66
  data = response.json()
67
  session_ids = data.get('session_ids', [])
68
  if session_ids:
69
+ print(f"Found {len(session_ids)} existing RAG sessions")
70
+ print(f" Session IDs: {session_ids[:3]}{'...' if len(session_ids) > 3 else ''}")
71
  else:
72
+ print("No RAG sessions found (normal if no documents were uploaded)")
73
 
74
  except Exception as e:
75
+ print(f"Error checking RAG sessions: {e}")
76
 
77
  print()
78
+ print("Diagnosis Complete")
79
  print()
80
  print("LIKELY ISSUE:")
81
  print("If sessions are not persisting, this is a common issue in HF Spaces")
 
92
  print("3. Optionally upload a document first")
93
  print()
94
 
 
 
 
95
  print("Update the BASE_URL variable above and uncomment the test_endpoints() call")
test_upload_permissions.py CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env python3
2
  """
3
  Test script to verify upload folder permissions and file operations.
4
  """
@@ -6,7 +6,7 @@ import os
6
  import tempfile
7
  from pathlib import Path
8
 
9
- print("=== Upload Folder Permission Test ===")
10
 
11
  # Detect environment
12
  is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
@@ -20,7 +20,7 @@ test_folders = [
20
  '/tmp/cognichat_uploads' # Alternative temp location
21
  ]
22
 
23
- print("\n=== Testing Upload Folder Options ===")
24
 
25
  for folder in test_folders:
26
  print(f"\nTesting: {folder}")
@@ -28,35 +28,35 @@ for folder in test_folders:
28
  try:
29
  # Try to create the directory
30
  os.makedirs(folder, exist_ok=True)
31
- print(f" βœ“ Directory created/exists")
32
 
33
  # Test write permissions
34
  test_file = os.path.join(folder, 'test_write.txt')
35
  with open(test_file, 'w') as f:
36
  f.write('test content')
37
- print(f" βœ“ Write permission verified")
38
 
39
  # Test read permissions
40
  with open(test_file, 'r') as f:
41
  content = f.read()
42
- print(f" βœ“ Read permission verified")
43
 
44
  # Clean up test file
45
  os.remove(test_file)
46
- print(f" βœ“ File deletion works")
47
 
48
  # Get absolute path
49
  abs_path = os.path.abspath(folder)
50
- print(f" β†’ Full path: {abs_path}")
51
- print(f" β†’ Writable: {os.access(folder, os.W_OK)}")
52
 
53
  except PermissionError as e:
54
- print(f" βœ— Permission denied: {e}")
55
  except Exception as e:
56
- print(f" βœ— Error: {e}")
57
 
58
  # Recommended configuration
59
- print(f"\n=== Recommended Configuration ===")
60
  if is_hf_spaces:
61
  recommended_folder = '/tmp/uploads'
62
  print(f"For Hugging Face Spaces: {recommended_folder}")
@@ -68,17 +68,15 @@ print(f"\nUse this in your Flask app:")
68
  print(f"app.config['UPLOAD_FOLDER'] = '{recommended_folder}'")
69
 
70
  # Test the current working directory permissions
71
- print(f"\n=== Current Directory Info ===")
72
  cwd = os.getcwd()
73
  print(f"Current working directory: {cwd}")
74
  print(f"CWD is writable: {os.access(cwd, os.W_OK)}")
75
-
76
- # Show environment variables related to paths
77
- print(f"\n=== Path Environment Variables ===")
78
  path_vars = ['HOME', 'TMPDIR', 'TEMP', 'TMP', 'SPACE_ID', 'SPACES_ZERO_GPU']
79
  for var in path_vars:
80
  value = os.getenv(var)
81
  if value:
82
  print(f"{var}: {value}")
83
 
84
- print(f"\n=== Test Complete ===")
 
1
+
2
  """
3
  Test script to verify upload folder permissions and file operations.
4
  """
 
6
  import tempfile
7
  from pathlib import Path
8
 
9
+ print("Upload Folder Permission Test")
10
 
11
  # Detect environment
12
  is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
 
20
  '/tmp/cognichat_uploads' # Alternative temp location
21
  ]
22
 
23
+ print("\nTesting Upload Folder Options")
24
 
25
  for folder in test_folders:
26
  print(f"\nTesting: {folder}")
 
28
  try:
29
  # Try to create the directory
30
  os.makedirs(folder, exist_ok=True)
31
+ print(f"Directory created/exists")
32
 
33
  # Test write permissions
34
  test_file = os.path.join(folder, 'test_write.txt')
35
  with open(test_file, 'w') as f:
36
  f.write('test content')
37
+ print(f"Write permission verified")
38
 
39
  # Test read permissions
40
  with open(test_file, 'r') as f:
41
  content = f.read()
42
+ print(f"Read permission verified")
43
 
44
  # Clean up test file
45
  os.remove(test_file)
46
+ print(f" File deletion works")
47
 
48
  # Get absolute path
49
  abs_path = os.path.abspath(folder)
50
+ print(f"Full path: {abs_path}")
51
+ print(f"Writable: {os.access(folder, os.W_OK)}")
52
 
53
  except PermissionError as e:
54
+ print(f"Permission denied: {e}")
55
  except Exception as e:
56
+ print(f"Error: {e}")
57
 
58
  # Recommended configuration
59
+ print(f"\nRecommended Configuration")
60
  if is_hf_spaces:
61
  recommended_folder = '/tmp/uploads'
62
  print(f"For Hugging Face Spaces: {recommended_folder}")
 
68
  print(f"app.config['UPLOAD_FOLDER'] = '{recommended_folder}'")
69
 
70
  # Test the current working directory permissions
71
+ print(f"\nCurrent Directory Info")
72
  cwd = os.getcwd()
73
  print(f"Current working directory: {cwd}")
74
  print(f"CWD is writable: {os.access(cwd, os.W_OK)}")
75
+ print(f"\nPath Environment Variables")
 
 
76
  path_vars = ['HOME', 'TMPDIR', 'TEMP', 'TMP', 'SPACE_ID', 'SPACES_ZERO_GPU']
77
  for var in path_vars:
78
  value = os.getenv(var)
79
  if value:
80
  print(f"{var}: {value}")
81
 
82
+ print(f"\nTest Complete")
verify_hf_spaces_ready.py CHANGED
@@ -16,7 +16,7 @@ def print_header(text):
16
 
17
  def print_check(condition, message):
18
  """Print a check result."""
19
- status = "βœ… PASS" if condition else "❌ FAIL"
20
  print(f"{status}: {message}")
21
  return condition
22
 
@@ -247,8 +247,8 @@ def main():
247
  print("5. Wait for build to complete")
248
  return 0
249
  else:
250
- print(f"\n❌ SOME CHECKS FAILED ({total - passed}/{total} issues)")
251
- print("\n⚠️ Please fix the issues above before deploying.")
252
  print("\nFor detailed guidance, see:")
253
  print("- HF_SPACES_FILE_STORAGE_GUIDE.md")
254
  print("- DEPLOYMENT.md")
 
16
 
17
  def print_check(condition, message):
18
  """Print a check result."""
19
+ status = "PASS" if condition else "FAIL"
20
  print(f"{status}: {message}")
21
  return condition
22
 
 
247
  print("5. Wait for build to complete")
248
  return 0
249
  else:
250
+ print(f"\n SOME CHECKS FAILED ({total - passed}/{total} issues)")
251
+ print("\n Please fix the issues above before deploying.")
252
  print("\nFor detailed guidance, see:")
253
  print("- HF_SPACES_FILE_STORAGE_GUIDE.md")
254
  print("- DEPLOYMENT.md")