bhavinmatariya commited on
Commit
113f6a0
·
1 Parent(s): e00c5eb

Remove unwanted logs and print statements

Browse files
api/api.py CHANGED
@@ -207,7 +207,7 @@ async def get_model_config():
207
  ModelConfig: A configuration object containing providers and their models
208
  """
209
  try:
210
- logger.info("Fetching model configurations")
211
 
212
  # Create providers from the config file
213
  providers = []
@@ -267,7 +267,7 @@ async def export_wiki(request: WikiExportRequest):
267
  A downloadable file in the requested format
268
  """
269
  try:
270
- logger.info(f"Exporting wiki for {request.repo_url} in {request.format} format")
271
 
272
  # Extract repository name from URL for the filename
273
  repo_parts = request.repo_url.rstrip('/').split('/')
@@ -319,7 +319,7 @@ async def get_local_repo_structure(path: str = Query(None, description="Path to
319
  )
320
 
321
  try:
322
- logger.info(f"Processing local repository at: {path}")
323
  file_tree_lines = []
324
  readme_content = ""
325
 
@@ -338,7 +338,7 @@ async def get_local_repo_structure(path: str = Query(None, description="Path to
338
  with open(os.path.join(root, file), 'r', encoding='utf-8') as f:
339
  readme_content = f.read()
340
  except Exception as e:
341
- logger.warning(f"Could not read README.md: {str(e)}")
342
  readme_content = ""
343
 
344
  file_tree_str = '\n'.join(sorted(file_tree_lines))
@@ -459,7 +459,7 @@ async def read_wiki_cache(owner: str, repo: str, repo_type: str, language: str)
459
  async def save_wiki_cache(data: WikiCacheRequest) -> bool:
460
  """Saves wiki cache data to the file system."""
461
  cache_path = get_wiki_cache_path(data.repo.owner, data.repo.repo, data.repo.type, data.language)
462
- logger.info(f"Attempting to save wiki cache. Path: {cache_path}")
463
  try:
464
  payload = WikiCacheData(
465
  wiki_structure=data.wiki_structure,
@@ -468,19 +468,13 @@ async def save_wiki_cache(data: WikiCacheRequest) -> bool:
468
  provider=data.provider,
469
  model=data.model
470
  )
471
- # Log size of data to be cached for debugging (avoid logging full content if large)
472
- try:
473
- payload_json = payload.model_dump_json()
474
- payload_size = len(payload_json.encode('utf-8'))
475
- logger.info(f"Payload prepared for caching. Size: {payload_size} bytes.")
476
- except Exception as ser_e:
477
- logger.warning(f"Could not serialize payload for size logging: {ser_e}")
478
 
479
 
480
- logger.info(f"Writing cache file to: {cache_path}")
481
  with open(cache_path, 'w', encoding='utf-8') as f:
482
  json.dump(payload.model_dump(), f, indent=2)
483
- logger.info(f"Wiki cache successfully saved to {cache_path}")
484
  return True
485
  except IOError as e:
486
  logger.error(f"IOError saving wiki cache to {cache_path}: {e.strerror} (errno: {e.errno})", exc_info=True)
@@ -506,14 +500,14 @@ async def get_cached_wiki(
506
  if not supported_langs.__contains__(language):
507
  language = configs["lang_config"]["default"]
508
 
509
- logger.info(f"Attempting to retrieve wiki cache for {owner}/{repo} ({repo_type}), lang: {language}")
510
  cached_data = await read_wiki_cache(owner, repo, repo_type, language)
511
  if cached_data:
512
  return cached_data
513
  else:
514
  # Return 200 with null body if not found, as frontend expects this behavior
515
  # Or, raise HTTPException(status_code=404, detail="Wiki cache not found") if preferred
516
- logger.info(f"Wiki cache not found for {owner}/{repo} ({repo_type}), lang: {language}")
517
  return None
518
 
519
  @app.post("/api/wiki_cache")
@@ -527,7 +521,7 @@ async def store_wiki_cache(request_data: WikiCacheRequest):
527
  if not supported_langs.__contains__(request_data.language):
528
  request_data.language = configs["lang_config"]["default"]
529
 
530
- logger.info(f"Attempting to save wiki cache for {request_data.repo.owner}/{request_data.repo.repo} ({request_data.repo.type}), lang: {request_data.language}")
531
  success = await save_wiki_cache(request_data)
532
  if success:
533
  return {"message": "Wiki cache saved successfully"}
@@ -551,23 +545,23 @@ async def delete_wiki_cache(
551
  raise HTTPException(status_code=400, detail="Language is not supported")
552
 
553
  if WIKI_AUTH_MODE:
554
- logger.info("check the authorization code")
555
  if WIKI_AUTH_CODE != authorization_code:
556
  raise HTTPException(status_code=401, detail="Authorization code is invalid")
557
 
558
- logger.info(f"Attempting to delete wiki cache for {owner}/{repo} ({repo_type}), lang: {language}")
559
  cache_path = get_wiki_cache_path(owner, repo, repo_type, language)
560
 
561
  if os.path.exists(cache_path):
562
  try:
563
  os.remove(cache_path)
564
- logger.info(f"Successfully deleted wiki cache: {cache_path}")
565
  return {"message": f"Wiki cache for {owner}/{repo} ({language}) deleted successfully"}
566
  except Exception as e:
567
  logger.error(f"Error deleting wiki cache {cache_path}: {e}")
568
  raise HTTPException(status_code=500, detail=f"Failed to delete wiki cache: {str(e)}")
569
  else:
570
- logger.warning(f"Wiki cache not found, cannot delete: {cache_path}")
571
  raise HTTPException(status_code=404, detail="Wiki cache not found")
572
 
573
  @app.get("/health")
@@ -618,10 +612,10 @@ async def get_processed_projects():
618
 
619
  try:
620
  if not os.path.exists(WIKI_CACHE_DIR):
621
- logger.info(f"Cache directory {WIKI_CACHE_DIR} not found. Returning empty list.")
622
  return []
623
 
624
- logger.info(f"Scanning for project cache files in: {WIKI_CACHE_DIR}")
625
  filenames = await asyncio.to_thread(os.listdir, WIKI_CACHE_DIR) # Use asyncio.to_thread for os.listdir
626
 
627
  for filename in filenames:
@@ -652,14 +646,14 @@ async def get_processed_projects():
652
  )
653
  )
654
  else:
655
- logger.warning(f"Could not parse project details from filename: {filename}")
656
  except Exception as e:
657
  logger.error(f"Error processing file {file_path}: {e}")
658
  continue # Skip this file on error
659
 
660
  # Sort by most recent first
661
  project_entries.sort(key=lambda p: p.submittedAt, reverse=True)
662
- logger.info(f"Found {len(project_entries)} processed project entries.")
663
  return project_entries
664
 
665
  except Exception as e:
 
207
  ModelConfig: A configuration object containing providers and their models
208
  """
209
  try:
210
+ # Fetching model configurations
211
 
212
  # Create providers from the config file
213
  providers = []
 
267
  A downloadable file in the requested format
268
  """
269
  try:
270
+ print(f"Exporting wiki for {request.repo_url} in {request.format} format")
271
 
272
  # Extract repository name from URL for the filename
273
  repo_parts = request.repo_url.rstrip('/').split('/')
 
319
  )
320
 
321
  try:
322
+ print(f"Processing local repository at: {path}")
323
  file_tree_lines = []
324
  readme_content = ""
325
 
 
338
  with open(os.path.join(root, file), 'r', encoding='utf-8') as f:
339
  readme_content = f.read()
340
  except Exception as e:
341
+ print(f"Warning: Could not read README.md: {str(e)}")
342
  readme_content = ""
343
 
344
  file_tree_str = '\n'.join(sorted(file_tree_lines))
 
459
  async def save_wiki_cache(data: WikiCacheRequest) -> bool:
460
  """Saves wiki cache data to the file system."""
461
  cache_path = get_wiki_cache_path(data.repo.owner, data.repo.repo, data.repo.type, data.language)
462
+ # Saving to cache
463
  try:
464
  payload = WikiCacheData(
465
  wiki_structure=data.wiki_structure,
 
468
  provider=data.provider,
469
  model=data.model
470
  )
471
+ # Payload prepared for caching
 
 
 
 
 
 
472
 
473
 
474
+ # Writing cache file
475
  with open(cache_path, 'w', encoding='utf-8') as f:
476
  json.dump(payload.model_dump(), f, indent=2)
477
+ print(f"Wiki cache saved to {cache_path}")
478
  return True
479
  except IOError as e:
480
  logger.error(f"IOError saving wiki cache to {cache_path}: {e.strerror} (errno: {e.errno})", exc_info=True)
 
500
  if not supported_langs.__contains__(language):
501
  language = configs["lang_config"]["default"]
502
 
503
+ # Retrieving wiki cache
504
  cached_data = await read_wiki_cache(owner, repo, repo_type, language)
505
  if cached_data:
506
  return cached_data
507
  else:
508
  # Return 200 with null body if not found, as frontend expects this behavior
509
  # Or, raise HTTPException(status_code=404, detail="Wiki cache not found") if preferred
510
+ # Wiki cache not found
511
  return None
512
 
513
  @app.post("/api/wiki_cache")
 
521
  if not supported_langs.__contains__(request_data.language):
522
  request_data.language = configs["lang_config"]["default"]
523
 
524
+ print(f"Saving wiki cache for {request_data.repo.owner}/{request_data.repo.repo}")
525
  success = await save_wiki_cache(request_data)
526
  if success:
527
  return {"message": "Wiki cache saved successfully"}
 
545
  raise HTTPException(status_code=400, detail="Language is not supported")
546
 
547
  if WIKI_AUTH_MODE:
548
+ # Checking authorization code
549
  if WIKI_AUTH_CODE != authorization_code:
550
  raise HTTPException(status_code=401, detail="Authorization code is invalid")
551
 
552
+ print(f"Deleting wiki cache for {owner}/{repo}")
553
  cache_path = get_wiki_cache_path(owner, repo, repo_type, language)
554
 
555
  if os.path.exists(cache_path):
556
  try:
557
  os.remove(cache_path)
558
+ print(f"Successfully deleted wiki cache: {cache_path}")
559
  return {"message": f"Wiki cache for {owner}/{repo} ({language}) deleted successfully"}
560
  except Exception as e:
561
  logger.error(f"Error deleting wiki cache {cache_path}: {e}")
562
  raise HTTPException(status_code=500, detail=f"Failed to delete wiki cache: {str(e)}")
563
  else:
564
+ print(f"Warning: Wiki cache not found: {cache_path}")
565
  raise HTTPException(status_code=404, detail="Wiki cache not found")
566
 
567
  @app.get("/health")
 
612
 
613
  try:
614
  if not os.path.exists(WIKI_CACHE_DIR):
615
+ print(f"Cache directory {WIKI_CACHE_DIR} not found")
616
  return []
617
 
618
+ # Scanning for project cache files
619
  filenames = await asyncio.to_thread(os.listdir, WIKI_CACHE_DIR) # Use asyncio.to_thread for os.listdir
620
 
621
  for filename in filenames:
 
646
  )
647
  )
648
  else:
649
+ print(f"Warning: Could not parse project details from filename: {filename}")
650
  except Exception as e:
651
  logger.error(f"Error processing file {file_path}: {e}")
652
  continue # Skip this file on error
653
 
654
  # Sort by most recent first
655
  project_entries.sort(key=lambda p: p.submittedAt, reverse=True)
656
+ print(f"Found {len(project_entries)} processed project entries")
657
  return project_entries
658
 
659
  except Exception as e:
api/config.py CHANGED
@@ -97,10 +97,7 @@ def replace_env_placeholders(config: Union[Dict[str, Any], List[Any], str, Any])
97
  original_placeholder = match.group(0)
98
  env_var_value = os.environ.get(env_var_name)
99
  if env_var_value is None:
100
- logger.warning(
101
- f"Environment variable placeholder '{original_placeholder}' was not found in the environment. "
102
- f"The placeholder string will be used as is."
103
- )
104
  return original_placeholder
105
  return env_var_value
106
 
@@ -124,10 +121,10 @@ def load_json_config(filename):
124
  # Otherwise use default directory
125
  config_path = Path(__file__).parent / "config" / filename
126
 
127
- logger.info(f"Loading configuration from {config_path}")
128
 
129
  if not config_path.exists():
130
- logger.warning(f"Configuration file {config_path} does not exist")
131
  return {}
132
 
133
  with open(config_path, 'r', encoding='utf-8') as f:
@@ -135,7 +132,7 @@ def load_json_config(filename):
135
  config = replace_env_placeholders(config)
136
  return config
137
  except Exception as e:
138
- logger.error(f"Error loading configuration file {filename}: {str(e)}")
139
  return {}
140
 
141
  # Load generator model configuration
@@ -161,7 +158,7 @@ def load_generator_config():
161
  }
162
  provider_config["model_client"] = default_map[provider_id]
163
  else:
164
- logger.warning(f"Unknown provider or client class: {provider_id}")
165
 
166
  return generator_config
167
 
@@ -235,7 +232,7 @@ def load_lang_config():
235
  return default_config
236
 
237
  if "supported_languages" not in loaded_config or "default" not in loaded_config:
238
- logger.warning("Language configuration file 'lang.json' is malformed. Using default language configuration.")
239
  return default_config
240
 
241
  return loaded_config
 
97
  original_placeholder = match.group(0)
98
  env_var_value = os.environ.get(env_var_name)
99
  if env_var_value is None:
100
+ # Environment variable placeholder not found, using as is
 
 
 
101
  return original_placeholder
102
  return env_var_value
103
 
 
121
  # Otherwise use default directory
122
  config_path = Path(__file__).parent / "config" / filename
123
 
124
+ # Loading configuration
125
 
126
  if not config_path.exists():
127
+ print(f"Warning: Configuration file {config_path} does not exist")
128
  return {}
129
 
130
  with open(config_path, 'r', encoding='utf-8') as f:
 
132
  config = replace_env_placeholders(config)
133
  return config
134
  except Exception as e:
135
+ print(f"Error loading configuration file {filename}: {str(e)}")
136
  return {}
137
 
138
  # Load generator model configuration
 
158
  }
159
  provider_config["model_client"] = default_map[provider_id]
160
  else:
161
+ print(f"Warning: Unknown provider or client class: {provider_id}")
162
 
163
  return generator_config
164
 
 
232
  return default_config
233
 
234
  if "supported_languages" not in loaded_config or "default" not in loaded_config:
235
+ print("Warning: Language configuration file 'lang.json' is malformed")
236
  return default_config
237
 
238
  return loaded_config
api/data_pipeline.py CHANGED
@@ -51,7 +51,7 @@ def count_tokens(text: str, is_ollama_embedder: bool = None) -> int:
51
  return len(encoding.encode(text))
52
  except Exception as e:
53
  # Fallback to a simple approximation if tiktoken fails
54
- logger.warning(f"Error counting tokens with tiktoken: {e}")
55
  # Rough approximation: 4 characters per token
56
  return len(text) // 4
57
 
@@ -69,7 +69,7 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t
69
  """
70
  try:
71
  # Check if Git is installed
72
- logger.info(f"Preparing to clone repository to {local_path}")
73
  subprocess.run(
74
  ["git", "--version"],
75
  check=True,
@@ -80,7 +80,7 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t
80
  # Check if repository already exists
81
  if os.path.exists(local_path) and os.listdir(local_path):
82
  # Directory exists and is not empty
83
- logger.warning(f"Repository already exists at {local_path}. Using existing repository.")
84
  return f"Using existing repository at {local_path}"
85
 
86
  # Ensure the local path exists
@@ -102,10 +102,10 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t
102
  # Format: https://x-token-auth:{token}@bitbucket.org/owner/repo.git
103
  clone_url = urlunparse((parsed.scheme, f"x-token-auth:{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
104
 
105
- logger.info("Using access token for authentication")
106
 
107
  # Clone the repository
108
- logger.info(f"Cloning repository from {repo_url} to {local_path}")
109
  # We use repo_url in the log to avoid exposing the token in logs
110
  result = subprocess.run(
111
  ["git", "clone", "--depth=1", "--single-branch", clone_url, local_path],
@@ -114,7 +114,7 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t
114
  stderr=subprocess.PIPE,
115
  )
116
 
117
- logger.info("Repository cloned successfully")
118
  return result.stdout.decode("utf-8")
119
 
120
  except subprocess.CalledProcessError as e:
@@ -164,9 +164,7 @@ def read_all_documents(path: str, is_ollama_embedder: bool = None, excluded_dirs
164
  final_included_dirs = set(included_dirs) if included_dirs else set()
165
  final_included_files = set(included_files) if included_files else set()
166
 
167
- logger.info(f"Using inclusion mode")
168
- logger.info(f"Included directories: {list(final_included_dirs)}")
169
- logger.info(f"Included files: {list(final_included_files)}")
170
 
171
  # Convert to lists for processing
172
  included_dirs = list(final_included_dirs)
@@ -199,11 +197,9 @@ def read_all_documents(path: str, is_ollama_embedder: bool = None, excluded_dirs
199
  included_dirs = []
200
  included_files = []
201
 
202
- logger.info(f"Using exclusion mode")
203
- logger.info(f"Excluded directories: {excluded_dirs}")
204
- logger.info(f"Excluded files: {excluded_files}")
205
 
206
- logger.info(f"Reading documents from {path}")
207
 
208
  def should_process_file(file_path: str, use_inclusion: bool, included_dirs: List[str], included_files: List[str],
209
  excluded_dirs: List[str], excluded_files: List[str]) -> bool:
@@ -323,7 +319,7 @@ def read_all_documents(path: str, is_ollama_embedder: bool = None, excluded_dirs
323
  # Check token count
324
  token_count = count_tokens(content, is_ollama_embedder)
325
  if token_count > MAX_EMBEDDING_TOKENS * 10:
326
- logger.warning(f"Skipping large file {relative_path}: Token count ({token_count}) exceeds limit")
327
  continue
328
 
329
  doc = Document(
@@ -339,7 +335,7 @@ def read_all_documents(path: str, is_ollama_embedder: bool = None, excluded_dirs
339
  )
340
  documents.append(doc)
341
  except Exception as e:
342
- logger.error(f"Error reading {file_path}: {e}")
343
 
344
  # Then process documentation files
345
  for ext in doc_extensions:
@@ -357,7 +353,7 @@ def read_all_documents(path: str, is_ollama_embedder: bool = None, excluded_dirs
357
  # Check token count
358
  token_count = count_tokens(content, is_ollama_embedder)
359
  if token_count > MAX_EMBEDDING_TOKENS:
360
- logger.warning(f"Skipping large file {relative_path}: Token count ({token_count}) exceeds limit")
361
  continue
362
 
363
  doc = Document(
@@ -373,9 +369,9 @@ def read_all_documents(path: str, is_ollama_embedder: bool = None, excluded_dirs
373
  )
374
  documents.append(doc)
375
  except Exception as e:
376
- logger.error(f"Error reading {file_path}: {e}")
377
 
378
- logger.info(f"Found {len(documents)} documents")
379
  return documents
380
 
381
  def prepare_data_pipeline(is_ollama_embedder: bool = None):
@@ -486,7 +482,7 @@ def get_github_file_content(repo_url: str, file_path: str, access_token: str = N
486
  headers = {}
487
  if access_token:
488
  headers["Authorization"] = f"token {access_token}"
489
- logger.info(f"Fetching file content from GitHub API: {api_url}")
490
  try:
491
  response = requests.get(api_url, headers=headers)
492
  response.raise_for_status()
@@ -563,12 +559,12 @@ def get_gitlab_file_content(repo_url: str, file_path: str, access_token: str = N
563
  if project_response.status_code == 200:
564
  project_data = project_response.json()
565
  default_branch = project_data.get('default_branch', 'main')
566
- logger.info(f"Found default branch: {default_branch}")
567
  else:
568
- logger.warning(f"Could not fetch project info, using 'main' as default branch")
569
  default_branch = 'main'
570
  except Exception as e:
571
- logger.warning(f"Error fetching project info: {e}, using 'main' as default branch")
572
  default_branch = 'main'
573
 
574
  api_url = f"{gitlab_domain}/api/v4/projects/{encoded_project_path}/repository/files/{encoded_file_path}/raw?ref={default_branch}"
@@ -576,7 +572,7 @@ def get_gitlab_file_content(repo_url: str, file_path: str, access_token: str = N
576
  headers = {}
577
  if access_token:
578
  headers["PRIVATE-TOKEN"] = access_token
579
- logger.info(f"Fetching file content from GitLab API: {api_url}")
580
  try:
581
  response = requests.get(api_url, headers=headers)
582
  response.raise_for_status()
@@ -634,12 +630,12 @@ def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str
634
  if repo_response.status_code == 200:
635
  repo_data = repo_response.json()
636
  default_branch = repo_data.get('mainbranch', {}).get('name', 'main')
637
- logger.info(f"Found default branch: {default_branch}")
638
  else:
639
- logger.warning(f"Could not fetch repository info, using 'main' as default branch")
640
  default_branch = 'main'
641
  except Exception as e:
642
- logger.warning(f"Error fetching repository info: {e}, using 'main' as default branch")
643
  default_branch = 'main'
644
 
645
  # Use Bitbucket API to get file content
@@ -650,7 +646,7 @@ def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str
650
  headers = {}
651
  if access_token:
652
  headers["Authorization"] = f"Bearer {access_token}"
653
- logger.info(f"Fetching file content from Bitbucket API: {api_url}")
654
  try:
655
  response = requests.get(api_url, headers=headers)
656
  if response.status_code == 200:
@@ -766,7 +762,7 @@ class DatabaseManager:
766
  repo_url_or_path (str): The URL or local path of the repository
767
  access_token (str, optional): Access token for private repositories
768
  """
769
- logger.info(f"Preparing repo storage for {repo_url_or_path}...")
770
 
771
  try:
772
  root_path = get_adalflow_default_root_path()
@@ -776,7 +772,7 @@ class DatabaseManager:
776
  if repo_url_or_path.startswith("https://") or repo_url_or_path.startswith("http://"):
777
  # Extract the repository name from the URL
778
  repo_name = self._extract_repo_name_from_url(repo_url_or_path, repo_type)
779
- logger.info(f"Extracted repo name: {repo_name}")
780
 
781
  save_repo_dir = os.path.join(root_path, "repos", repo_name)
782
 
@@ -785,7 +781,7 @@ class DatabaseManager:
785
  # Only download if the repository doesn't exist or is empty
786
  download_repo(repo_url_or_path, save_repo_dir, repo_type, access_token)
787
  else:
788
- logger.info(f"Repository already exists at {save_repo_dir}. Using existing repository.")
789
  else: # local path
790
  repo_name = os.path.basename(repo_url_or_path)
791
  save_repo_dir = repo_url_or_path
@@ -799,10 +795,10 @@ class DatabaseManager:
799
  "save_db_file": save_db_file,
800
  }
801
  self.repo_url_or_path = repo_url_or_path
802
- logger.info(f"Repo paths: {self.repo_paths}")
803
 
804
  except Exception as e:
805
- logger.error(f"Failed to create repository structure: {e}")
806
  raise
807
 
808
  def prepare_db_index(self, is_ollama_embedder: bool = None, excluded_dirs: List[str] = None, excluded_files: List[str] = None,
@@ -823,19 +819,19 @@ class DatabaseManager:
823
  """
824
  # check the database
825
  if self.repo_paths and os.path.exists(self.repo_paths["save_db_file"]):
826
- logger.info("Loading existing database...")
827
  try:
828
  self.db = LocalDB.load_state(self.repo_paths["save_db_file"])
829
  documents = self.db.get_transformed_data(key="split_and_embed")
830
  if documents:
831
- logger.info(f"Loaded {len(documents)} documents from existing database")
832
  return documents
833
  except Exception as e:
834
- logger.error(f"Error loading existing database: {e}")
835
  # Continue to create a new database
836
 
837
  # prepare the database
838
- logger.info("Creating new database...")
839
  documents = read_all_documents(
840
  self.repo_paths["save_repo_dir"],
841
  is_ollama_embedder=is_ollama_embedder,
@@ -847,9 +843,9 @@ class DatabaseManager:
847
  self.db = transform_documents_and_save_to_db(
848
  documents, self.repo_paths["save_db_file"], is_ollama_embedder=is_ollama_embedder
849
  )
850
- logger.info(f"Total documents: {len(documents)}")
851
  transformed_docs = self.db.get_transformed_data(key="split_and_embed")
852
- logger.info(f"Total transformed documents: {len(transformed_docs)}")
853
  return transformed_docs
854
 
855
  def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_token: str = None):
 
51
  return len(encoding.encode(text))
52
  except Exception as e:
53
  # Fallback to a simple approximation if tiktoken fails
54
+ print(f"Warning: Error counting tokens with tiktoken: {e}")
55
  # Rough approximation: 4 characters per token
56
  return len(text) // 4
57
 
 
69
  """
70
  try:
71
  # Check if Git is installed
72
+ print(f"Preparing to clone repository to {local_path}")
73
  subprocess.run(
74
  ["git", "--version"],
75
  check=True,
 
80
  # Check if repository already exists
81
  if os.path.exists(local_path) and os.listdir(local_path):
82
  # Directory exists and is not empty
83
+ print(f"Repository already exists at {local_path}")
84
  return f"Using existing repository at {local_path}"
85
 
86
  # Ensure the local path exists
 
102
  # Format: https://x-token-auth:{token}@bitbucket.org/owner/repo.git
103
  clone_url = urlunparse((parsed.scheme, f"x-token-auth:{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
104
 
105
+ # Using access token for authentication
106
 
107
  # Clone the repository
108
+ print(f"Cloning repository from {repo_url}")
109
  # We use repo_url in the log to avoid exposing the token in logs
110
  result = subprocess.run(
111
  ["git", "clone", "--depth=1", "--single-branch", clone_url, local_path],
 
114
  stderr=subprocess.PIPE,
115
  )
116
 
117
+ print("Repository cloned successfully")
118
  return result.stdout.decode("utf-8")
119
 
120
  except subprocess.CalledProcessError as e:
 
164
  final_included_dirs = set(included_dirs) if included_dirs else set()
165
  final_included_files = set(included_files) if included_files else set()
166
 
167
+ print(f"Using inclusion mode: dirs={list(final_included_dirs)}, files={list(final_included_files)}")
 
 
168
 
169
  # Convert to lists for processing
170
  included_dirs = list(final_included_dirs)
 
197
  included_dirs = []
198
  included_files = []
199
 
200
+ # Using exclusion mode with default filters
 
 
201
 
202
+ print(f"Reading documents from {path}")
203
 
204
  def should_process_file(file_path: str, use_inclusion: bool, included_dirs: List[str], included_files: List[str],
205
  excluded_dirs: List[str], excluded_files: List[str]) -> bool:
 
319
  # Check token count
320
  token_count = count_tokens(content, is_ollama_embedder)
321
  if token_count > MAX_EMBEDDING_TOKENS * 10:
322
+ print(f"Skipping large file {relative_path}: {token_count} tokens")
323
  continue
324
 
325
  doc = Document(
 
335
  )
336
  documents.append(doc)
337
  except Exception as e:
338
+ print(f"Error reading {file_path}: {e}")
339
 
340
  # Then process documentation files
341
  for ext in doc_extensions:
 
353
  # Check token count
354
  token_count = count_tokens(content, is_ollama_embedder)
355
  if token_count > MAX_EMBEDDING_TOKENS:
356
+ print(f"Skipping large file {relative_path}: {token_count} tokens")
357
  continue
358
 
359
  doc = Document(
 
369
  )
370
  documents.append(doc)
371
  except Exception as e:
372
+ print(f"Error reading {file_path}: {e}")
373
 
374
+ print(f"Found {len(documents)} documents")
375
  return documents
376
 
377
  def prepare_data_pipeline(is_ollama_embedder: bool = None):
 
482
  headers = {}
483
  if access_token:
484
  headers["Authorization"] = f"token {access_token}"
485
+ # Fetching file content from GitHub API
486
  try:
487
  response = requests.get(api_url, headers=headers)
488
  response.raise_for_status()
 
559
  if project_response.status_code == 200:
560
  project_data = project_response.json()
561
  default_branch = project_data.get('default_branch', 'main')
562
+ # Found default branch
563
  else:
564
+ print("Warning: Could not fetch project info, using 'main' as default branch")
565
  default_branch = 'main'
566
  except Exception as e:
567
+ print(f"Warning: Error fetching project info: {e}, using 'main' as default branch")
568
  default_branch = 'main'
569
 
570
  api_url = f"{gitlab_domain}/api/v4/projects/{encoded_project_path}/repository/files/{encoded_file_path}/raw?ref={default_branch}"
 
572
  headers = {}
573
  if access_token:
574
  headers["PRIVATE-TOKEN"] = access_token
575
+ # Fetching file content from GitLab API
576
  try:
577
  response = requests.get(api_url, headers=headers)
578
  response.raise_for_status()
 
630
  if repo_response.status_code == 200:
631
  repo_data = repo_response.json()
632
  default_branch = repo_data.get('mainbranch', {}).get('name', 'main')
633
+ # Found default branch
634
  else:
635
+ print("Warning: Could not fetch repository info, using 'main' as default branch")
636
  default_branch = 'main'
637
  except Exception as e:
638
+ print(f"Warning: Error fetching repository info: {e}, using 'main' as default branch")
639
  default_branch = 'main'
640
 
641
  # Use Bitbucket API to get file content
 
646
  headers = {}
647
  if access_token:
648
  headers["Authorization"] = f"Bearer {access_token}"
649
+ # Fetching file content from Bitbucket API
650
  try:
651
  response = requests.get(api_url, headers=headers)
652
  if response.status_code == 200:
 
762
  repo_url_or_path (str): The URL or local path of the repository
763
  access_token (str, optional): Access token for private repositories
764
  """
765
+ print(f"Preparing repo storage for {repo_url_or_path}")
766
 
767
  try:
768
  root_path = get_adalflow_default_root_path()
 
772
  if repo_url_or_path.startswith("https://") or repo_url_or_path.startswith("http://"):
773
  # Extract the repository name from the URL
774
  repo_name = self._extract_repo_name_from_url(repo_url_or_path, repo_type)
775
+ # Extracted repo name
776
 
777
  save_repo_dir = os.path.join(root_path, "repos", repo_name)
778
 
 
781
  # Only download if the repository doesn't exist or is empty
782
  download_repo(repo_url_or_path, save_repo_dir, repo_type, access_token)
783
  else:
784
+ print(f"Repository already exists at {save_repo_dir}")
785
  else: # local path
786
  repo_name = os.path.basename(repo_url_or_path)
787
  save_repo_dir = repo_url_or_path
 
795
  "save_db_file": save_db_file,
796
  }
797
  self.repo_url_or_path = repo_url_or_path
798
+ # Repository paths configured
799
 
800
  except Exception as e:
801
+ print(f"Error: Failed to create repository structure: {e}")
802
  raise
803
 
804
  def prepare_db_index(self, is_ollama_embedder: bool = None, excluded_dirs: List[str] = None, excluded_files: List[str] = None,
 
819
  """
820
  # check the database
821
  if self.repo_paths and os.path.exists(self.repo_paths["save_db_file"]):
822
+ print("Loading existing database...")
823
  try:
824
  self.db = LocalDB.load_state(self.repo_paths["save_db_file"])
825
  documents = self.db.get_transformed_data(key="split_and_embed")
826
  if documents:
827
+ print(f"Loaded {len(documents)} documents from existing database")
828
  return documents
829
  except Exception as e:
830
+ print(f"Error loading existing database: {e}")
831
  # Continue to create a new database
832
 
833
  # prepare the database
834
+ print("Creating new database...")
835
  documents = read_all_documents(
836
  self.repo_paths["save_repo_dir"],
837
  is_ollama_embedder=is_ollama_embedder,
 
843
  self.db = transform_documents_and_save_to_db(
844
  documents, self.repo_paths["save_db_file"], is_ollama_embedder=is_ollama_embedder
845
  )
846
+ print(f"Total documents: {len(documents)}")
847
  transformed_docs = self.db.get_transformed_data(key="split_and_embed")
848
+ print(f"Total transformed documents: {len(transformed_docs)}")
849
  return transformed_docs
850
 
851
  def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_token: str = None):
api/logging_config.py CHANGED
@@ -46,9 +46,9 @@ def setup_logging(format: str = None):
46
  disable_file_logging = True
47
  default_log_file = None
48
 
49
- # Get log level from environment
50
- log_level_str = os.environ.get("LOG_LEVEL", "INFO").upper()
51
- log_level = getattr(logging, log_level_str, logging.INFO)
52
 
53
  # Get log file path (only if file logging is enabled)
54
  resolved_path = None
 
46
  disable_file_logging = True
47
  default_log_file = None
48
 
49
+ # Get log level from environment - default to WARNING to reduce noise in production
50
+ log_level_str = os.environ.get("LOG_LEVEL", "WARNING").upper()
51
+ log_level = getattr(logging, log_level_str, logging.WARNING)
52
 
53
  # Get log file path (only if file logging is enabled)
54
  resolved_path = None
api/main.py CHANGED
@@ -94,8 +94,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
94
  required_env_vars = ['GOOGLE_API_KEY', 'OPENAI_API_KEY']
95
  missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
96
  if missing_vars:
97
- logger.warning(f"Missing environment variables: {', '.join(missing_vars)}")
98
- logger.warning("Some functionality may not work correctly without these variables.")
99
 
100
  # Configure Google Generative AI
101
  import google.generativeai as genai
@@ -104,7 +103,7 @@ from api.config import GOOGLE_API_KEY
104
  if GOOGLE_API_KEY:
105
  genai.configure(api_key=GOOGLE_API_KEY)
106
  else:
107
- logger.warning("GOOGLE_API_KEY not configured")
108
 
109
  if __name__ == "__main__":
110
  # Get port from environment variable or use default
@@ -113,7 +112,7 @@ if __name__ == "__main__":
113
  # Import the app here to ensure environment variables are set first
114
  from api.api import app
115
 
116
- logger.info(f"Starting Streaming API on port {port}")
117
 
118
  # Run the FastAPI app with uvicorn
119
  # Disable reload in production/Docker environment
 
94
  required_env_vars = ['GOOGLE_API_KEY', 'OPENAI_API_KEY']
95
  missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
96
  if missing_vars:
97
+ print(f"Warning: Missing environment variables: {', '.join(missing_vars)}")
 
98
 
99
  # Configure Google Generative AI
100
  import google.generativeai as genai
 
103
  if GOOGLE_API_KEY:
104
  genai.configure(api_key=GOOGLE_API_KEY)
105
  else:
106
+ print("Warning: GOOGLE_API_KEY not configured")
107
 
108
  if __name__ == "__main__":
109
  # Get port from environment variable or use default
 
112
  # Import the app here to ensure environment variables are set first
113
  from api.api import app
114
 
115
+ print(f"Starting Streaming API on port {port}")
116
 
117
  # Run the FastAPI app with uvicorn
118
  # Disable reload in production/Docker environment
api/simple_chat.py CHANGED
@@ -82,9 +82,9 @@ async def chat_completions_stream(request: ChatCompletionRequest):
82
  last_message = request.messages[-1]
83
  if hasattr(last_message, 'content') and last_message.content:
84
  tokens = count_tokens(last_message.content, request.provider == "ollama")
85
- logger.info(f"Request size: {tokens} tokens")
86
  if tokens > 8000:
87
- logger.warning(f"Request exceeds recommended token limit ({tokens} > 7500)")
88
  input_too_large = True
89
 
90
  # Create a new RAG instance for this request
@@ -99,28 +99,28 @@ async def chat_completions_stream(request: ChatCompletionRequest):
99
 
100
  if request.excluded_dirs:
101
  excluded_dirs = [unquote(dir_path) for dir_path in request.excluded_dirs.split('\n') if dir_path.strip()]
102
- logger.info(f"Using custom excluded directories: {excluded_dirs}")
103
  if request.excluded_files:
104
  excluded_files = [unquote(file_pattern) for file_pattern in request.excluded_files.split('\n') if file_pattern.strip()]
105
- logger.info(f"Using custom excluded files: {excluded_files}")
106
  if request.included_dirs:
107
  included_dirs = [unquote(dir_path) for dir_path in request.included_dirs.split('\n') if dir_path.strip()]
108
- logger.info(f"Using custom included directories: {included_dirs}")
109
  if request.included_files:
110
  included_files = [unquote(file_pattern) for file_pattern in request.included_files.split('\n') if file_pattern.strip()]
111
- logger.info(f"Using custom included files: {included_files}")
112
 
113
  request_rag.prepare_retriever(request.repo_url, request.type, request.token, excluded_dirs, excluded_files, included_dirs, included_files)
114
- logger.info(f"Retriever prepared for {request.repo_url}")
115
  except ValueError as e:
116
  if "No valid documents with embeddings found" in str(e):
117
- logger.error(f"No valid embeddings found: {str(e)}")
118
  raise HTTPException(status_code=500, detail="No valid document embeddings found. This may be due to embedding size inconsistencies or API errors during document processing. Please try again or check your repository content.")
119
  else:
120
- logger.error(f"ValueError preparing retriever: {str(e)}")
121
  raise HTTPException(status_code=500, detail=f"Error preparing retriever: {str(e)}")
122
  except Exception as e:
123
- logger.error(f"Error preparing retriever: {str(e)}")
124
  # Check for specific embedding-related errors
125
  if "All embeddings should be of the same size" in str(e):
126
  raise HTTPException(status_code=500, detail="Inconsistent embedding sizes detected. Some documents may have failed to embed properly. Please try again.")
@@ -163,7 +163,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
163
  # Count research iterations if this is a Deep Research request
164
  if is_deep_research:
165
  research_iteration = sum(1 for msg in request.messages if msg.role == 'assistant') + 1
166
- logger.info(f"Deep Research request detected - iteration {research_iteration}")
167
 
168
  # Check if this is a continuation request
169
  if "continue" in last_message.content.lower() and "research" in last_message.content.lower():
@@ -172,13 +172,13 @@ async def chat_completions_stream(request: ChatCompletionRequest):
172
  for msg in request.messages:
173
  if msg.role == "user" and "continue" not in msg.content.lower():
174
  original_topic = msg.content.replace("[DEEP RESEARCH]", "").strip()
175
- logger.info(f"Found original research topic: {original_topic}")
176
  break
177
 
178
  if original_topic:
179
  # Replace the continuation message with the original topic
180
  last_message.content = original_topic
181
- logger.info(f"Using original topic for research: {original_topic}")
182
 
183
  # Get the query from the last message
184
  query = last_message.content
@@ -194,7 +194,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
194
  if request.filePath:
195
  # Use the file path to get relevant context about the file
196
  rag_query = f"Contexts related to {request.filePath}"
197
- logger.info(f"Modified RAG query to focus on file: {request.filePath}")
198
 
199
  # Try to perform RAG retrieval
200
  try:
@@ -204,7 +204,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
204
  if retrieved_documents and retrieved_documents[0].documents:
205
  # Format context for the prompt in a more structured way
206
  documents = retrieved_documents[0].documents
207
- logger.info(f"Retrieved {len(documents)} documents")
208
 
209
  # Group documents by file path
210
  docs_by_file = {}
@@ -227,13 +227,13 @@ async def chat_completions_stream(request: ChatCompletionRequest):
227
  # Join all parts with clear separation
228
  context_text = "\n\n" + "-" * 10 + "\n\n".join(context_parts)
229
  else:
230
- logger.warning("No documents retrieved from RAG")
231
  except Exception as e:
232
- logger.error(f"Error in RAG retrieval: {str(e)}")
233
  # Continue without RAG if there's an error
234
 
235
  except Exception as e:
236
- logger.error(f"Error retrieving documents: {str(e)}")
237
  context_text = ""
238
 
239
  # Get repository information
@@ -292,9 +292,9 @@ async def chat_completions_stream(request: ChatCompletionRequest):
292
  if request.filePath:
293
  try:
294
  file_content = get_file_content(request.repo_url, request.filePath, request.type, request.token)
295
- logger.info(f"Successfully retrieved content for file: {request.filePath}")
296
  except Exception as e:
297
- logger.error(f"Error retrieving file content: {str(e)}")
298
  # Continue without file content if there's an error
299
 
300
  # Format conversation history
@@ -321,7 +321,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
321
  prompt += f"{CONTEXT_START}\n{context_text}\n{CONTEXT_END}\n\n"
322
  else:
323
  # Add a note that we're skipping RAG due to size constraints or because it's the isolated API
324
- logger.info("No context available from RAG")
325
  prompt += "<note>Answering without retrieval augmentation.</note>\n\n"
326
 
327
  prompt += f"<query>\n{query}\n</query>\n\nAssistant: "
@@ -348,11 +348,11 @@ async def chat_completions_stream(request: ChatCompletionRequest):
348
  model_type=ModelType.LLM
349
  )
350
  elif request.provider == "openrouter":
351
- logger.info(f"Using OpenRouter with model: {request.model}")
352
 
353
  # Check if OpenRouter API key is set
354
  if not OPENROUTER_API_KEY:
355
- logger.warning("OPENROUTER_API_KEY not configured, but continuing with request")
356
  # We'll let the OpenRouterClient handle this and return a friendly error message
357
 
358
  model = OpenRouterClient()
@@ -371,11 +371,11 @@ async def chat_completions_stream(request: ChatCompletionRequest):
371
  model_type=ModelType.LLM
372
  )
373
  elif request.provider == "openai":
374
- logger.info(f"Using Openai protocol with model: {request.model}")
375
 
376
  # Check if an API key is set for Openai
377
  if not OPENAI_API_KEY:
378
- logger.warning("OPENAI_API_KEY not configured, but continuing with request")
379
  # We'll let the OpenAIClient handle this and return an error message
380
 
381
  # Initialize Openai client
@@ -395,11 +395,11 @@ async def chat_completions_stream(request: ChatCompletionRequest):
395
  model_type=ModelType.LLM
396
  )
397
  elif request.provider == "bedrock":
398
- logger.info(f"Using AWS Bedrock with model: {request.model}")
399
 
400
  # Check if AWS credentials are set
401
  if not AWS_ACCESS_KEY_ID or not AWS_SECRET_ACCESS_KEY:
402
- logger.warning("AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY not configured, but continuing with request")
403
  # We'll let the BedrockClient handle this and return an error message
404
 
405
  # Initialize Bedrock client
@@ -416,7 +416,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
416
  model_type=ModelType.LLM
417
  )
418
  elif request.provider == "azure":
419
- logger.info(f"Using Azure AI with model: {request.model}")
420
 
421
  # Initialize Azure AI client
422
  model = AzureAIClient()
@@ -458,18 +458,18 @@ async def chat_completions_stream(request: ChatCompletionRequest):
458
  elif request.provider == "openrouter":
459
  try:
460
  # Get the response and handle it properly using the previously created api_kwargs
461
- logger.info("Making OpenRouter API call")
462
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
463
  # Handle streaming response from OpenRouter
464
  async for chunk in response:
465
  yield chunk
466
  except Exception as e_openrouter:
467
- logger.error(f"Error with OpenRouter API: {str(e_openrouter)}")
468
  yield f"\nError with OpenRouter API: {str(e_openrouter)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
469
  elif request.provider == "openai":
470
  try:
471
  # Get the response and handle it properly using the previously created api_kwargs
472
- logger.info("Making Openai API call")
473
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
474
  # Handle streaming response from Openai
475
  async for chunk in response:
@@ -481,12 +481,12 @@ async def chat_completions_stream(request: ChatCompletionRequest):
481
  if text is not None:
482
  yield text
483
  except Exception as e_openai:
484
- logger.error(f"Error with Openai API: {str(e_openai)}")
485
  yield f"\nError with Openai API: {str(e_openai)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
486
  elif request.provider == "bedrock":
487
  try:
488
  # Get the response and handle it properly using the previously created api_kwargs
489
- logger.info("Making AWS Bedrock API call")
490
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
491
  # Handle response from Bedrock (not streaming yet)
492
  if isinstance(response, str):
@@ -495,12 +495,12 @@ async def chat_completions_stream(request: ChatCompletionRequest):
495
  # Try to extract text from the response
496
  yield str(response)
497
  except Exception as e_bedrock:
498
- logger.error(f"Error with AWS Bedrock API: {str(e_bedrock)}")
499
  yield f"\nError with AWS Bedrock API: {str(e_bedrock)}\n\nPlease check that you have set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables with valid credentials."
500
  elif request.provider == "azure":
501
  try:
502
  # Get the response and handle it properly using the previously created api_kwargs
503
- logger.info("Making Azure AI API call")
504
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
505
  # Handle streaming response from Azure AI
506
  async for chunk in response:
@@ -512,7 +512,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
512
  if text is not None:
513
  yield text
514
  except Exception as e_azure:
515
- logger.error(f"Error with Azure AI API: {str(e_azure)}")
516
  yield f"\nError with Azure AI API: {str(e_azure)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
517
  else:
518
  # Generate streaming response
@@ -523,13 +523,13 @@ async def chat_completions_stream(request: ChatCompletionRequest):
523
  yield chunk.text
524
 
525
  except Exception as e_outer:
526
- logger.error(f"Error in streaming response: {str(e_outer)}")
527
  error_message = str(e_outer)
528
 
529
  # Check for token limit errors
530
  if "maximum context length" in error_message or "token limit" in error_message or "too many tokens" in error_message:
531
  # If we hit a token limit error, try again without context
532
- logger.warning("Token limit exceeded, retrying without context")
533
  try:
534
  # Create a simplified prompt without context
535
  simplified_prompt = f"/no_think {system_prompt}\n\n"
@@ -572,14 +572,14 @@ async def chat_completions_stream(request: ChatCompletionRequest):
572
  )
573
 
574
  # Get the response using the simplified prompt
575
- logger.info("Making fallback OpenRouter API call")
576
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
577
 
578
  # Handle streaming fallback_response from OpenRouter
579
  async for chunk in fallback_response:
580
  yield chunk
581
  except Exception as e_fallback:
582
- logger.error(f"Error with OpenRouter API fallback: {str(e_fallback)}")
583
  yield f"\nError with OpenRouter API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
584
  elif request.provider == "openai":
585
  try:
@@ -591,7 +591,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
591
  )
592
 
593
  # Get the response using the simplified prompt
594
- logger.info("Making fallback Openai API call")
595
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
596
 
597
  # Handle streaming fallback_response from Openai
@@ -599,7 +599,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
599
  text = chunk if isinstance(chunk, str) else getattr(chunk, 'text', str(chunk))
600
  yield text
601
  except Exception as e_fallback:
602
- logger.error(f"Error with Openai API fallback: {str(e_fallback)}")
603
  yield f"\nError with Openai API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
604
  elif request.provider == "bedrock":
605
  try:
@@ -611,7 +611,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
611
  )
612
 
613
  # Get the response using the simplified prompt
614
- logger.info("Making fallback AWS Bedrock API call")
615
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
616
 
617
  # Handle response from Bedrock
@@ -621,7 +621,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
621
  # Try to extract text from the response
622
  yield str(fallback_response)
623
  except Exception as e_fallback:
624
- logger.error(f"Error with AWS Bedrock API fallback: {str(e_fallback)}")
625
  yield f"\nError with AWS Bedrock API fallback: {str(e_fallback)}\n\nPlease check that you have set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables with valid credentials."
626
  elif request.provider == "azure":
627
  try:
@@ -633,7 +633,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
633
  )
634
 
635
  # Get the response using the simplified prompt
636
- logger.info("Making fallback Azure AI API call")
637
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
638
 
639
  # Handle streaming fallback response from Azure AI
@@ -646,7 +646,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
646
  if text is not None:
647
  yield text
648
  except Exception as e_fallback:
649
- logger.error(f"Error with Azure AI API fallback: {str(e_fallback)}")
650
  yield f"\nError with Azure AI API fallback: {str(e_fallback)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
651
  else:
652
  # Initialize Google Generative AI model
@@ -667,7 +667,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
667
  if hasattr(chunk, 'text'):
668
  yield chunk.text
669
  except Exception as e2:
670
- logger.error(f"Error in fallback streaming response: {str(e2)}")
671
  yield f"\nI apologize, but your request is too large for me to process. Please try a shorter query or break it into smaller parts."
672
  else:
673
  # For other errors, return the error message
@@ -680,7 +680,7 @@ async def chat_completions_stream(request: ChatCompletionRequest):
680
  raise
681
  except Exception as e_handler:
682
  error_msg = f"Error in streaming chat completion: {str(e_handler)}"
683
- logger.error(error_msg)
684
  raise HTTPException(status_code=500, detail=error_msg)
685
 
686
  @app.get("/")
 
82
  last_message = request.messages[-1]
83
  if hasattr(last_message, 'content') and last_message.content:
84
  tokens = count_tokens(last_message.content, request.provider == "ollama")
85
+ # Request size check
86
  if tokens > 8000:
87
+ print(f"Warning: Request exceeds recommended token limit ({tokens} > 7500)")
88
  input_too_large = True
89
 
90
  # Create a new RAG instance for this request
 
99
 
100
  if request.excluded_dirs:
101
  excluded_dirs = [unquote(dir_path) for dir_path in request.excluded_dirs.split('\n') if dir_path.strip()]
102
+ # Using custom excluded directories
103
  if request.excluded_files:
104
  excluded_files = [unquote(file_pattern) for file_pattern in request.excluded_files.split('\n') if file_pattern.strip()]
105
+ # Using custom excluded files
106
  if request.included_dirs:
107
  included_dirs = [unquote(dir_path) for dir_path in request.included_dirs.split('\n') if dir_path.strip()]
108
+ # Using custom included directories
109
  if request.included_files:
110
  included_files = [unquote(file_pattern) for file_pattern in request.included_files.split('\n') if file_pattern.strip()]
111
+ # Using custom included files
112
 
113
  request_rag.prepare_retriever(request.repo_url, request.type, request.token, excluded_dirs, excluded_files, included_dirs, included_files)
114
+ print(f"Retriever prepared for {request.repo_url}")
115
  except ValueError as e:
116
  if "No valid documents with embeddings found" in str(e):
117
+ print(f"Error: No valid embeddings found: {str(e)}")
118
  raise HTTPException(status_code=500, detail="No valid document embeddings found. This may be due to embedding size inconsistencies or API errors during document processing. Please try again or check your repository content.")
119
  else:
120
+ print(f"Error: ValueError preparing retriever: {str(e)}")
121
  raise HTTPException(status_code=500, detail=f"Error preparing retriever: {str(e)}")
122
  except Exception as e:
123
+ print(f"Error preparing retriever: {str(e)}")
124
  # Check for specific embedding-related errors
125
  if "All embeddings should be of the same size" in str(e):
126
  raise HTTPException(status_code=500, detail="Inconsistent embedding sizes detected. Some documents may have failed to embed properly. Please try again.")
 
163
  # Count research iterations if this is a Deep Research request
164
  if is_deep_research:
165
  research_iteration = sum(1 for msg in request.messages if msg.role == 'assistant') + 1
166
+ print(f"Deep Research request detected - iteration {research_iteration}")
167
 
168
  # Check if this is a continuation request
169
  if "continue" in last_message.content.lower() and "research" in last_message.content.lower():
 
172
  for msg in request.messages:
173
  if msg.role == "user" and "continue" not in msg.content.lower():
174
  original_topic = msg.content.replace("[DEEP RESEARCH]", "").strip()
175
+ # Found original research topic
176
  break
177
 
178
  if original_topic:
179
  # Replace the continuation message with the original topic
180
  last_message.content = original_topic
181
+ # Using original topic for research
182
 
183
  # Get the query from the last message
184
  query = last_message.content
 
194
  if request.filePath:
195
  # Use the file path to get relevant context about the file
196
  rag_query = f"Contexts related to {request.filePath}"
197
+ # Modified RAG query to focus on file
198
 
199
  # Try to perform RAG retrieval
200
  try:
 
204
  if retrieved_documents and retrieved_documents[0].documents:
205
  # Format context for the prompt in a more structured way
206
  documents = retrieved_documents[0].documents
207
+ # Retrieved documents
208
 
209
  # Group documents by file path
210
  docs_by_file = {}
 
227
  # Join all parts with clear separation
228
  context_text = "\n\n" + "-" * 10 + "\n\n".join(context_parts)
229
  else:
230
+ print("Warning: No documents retrieved from RAG")
231
  except Exception as e:
232
+ print(f"Error in RAG retrieval: {str(e)}")
233
  # Continue without RAG if there's an error
234
 
235
  except Exception as e:
236
+ print(f"Error retrieving documents: {str(e)}")
237
  context_text = ""
238
 
239
  # Get repository information
 
292
  if request.filePath:
293
  try:
294
  file_content = get_file_content(request.repo_url, request.filePath, request.type, request.token)
295
+ # Successfully retrieved content for file
296
  except Exception as e:
297
+ print(f"Error retrieving file content: {str(e)}")
298
  # Continue without file content if there's an error
299
 
300
  # Format conversation history
 
321
  prompt += f"{CONTEXT_START}\n{context_text}\n{CONTEXT_END}\n\n"
322
  else:
323
  # Add a note that we're skipping RAG due to size constraints or because it's the isolated API
324
+ # No context available from RAG
325
  prompt += "<note>Answering without retrieval augmentation.</note>\n\n"
326
 
327
  prompt += f"<query>\n{query}\n</query>\n\nAssistant: "
 
348
  model_type=ModelType.LLM
349
  )
350
  elif request.provider == "openrouter":
351
+ # Using OpenRouter
352
 
353
  # Check if OpenRouter API key is set
354
  if not OPENROUTER_API_KEY:
355
+ print("Warning: OPENROUTER_API_KEY not configured")
356
  # We'll let the OpenRouterClient handle this and return a friendly error message
357
 
358
  model = OpenRouterClient()
 
371
  model_type=ModelType.LLM
372
  )
373
  elif request.provider == "openai":
374
+ # Using OpenAI
375
 
376
  # Check if an API key is set for Openai
377
  if not OPENAI_API_KEY:
378
+ print("Warning: OPENAI_API_KEY not configured")
379
  # We'll let the OpenAIClient handle this and return an error message
380
 
381
  # Initialize Openai client
 
395
  model_type=ModelType.LLM
396
  )
397
  elif request.provider == "bedrock":
398
+ # Using AWS Bedrock
399
 
400
  # Check if AWS credentials are set
401
  if not AWS_ACCESS_KEY_ID or not AWS_SECRET_ACCESS_KEY:
402
+ print("Warning: AWS credentials not configured")
403
  # We'll let the BedrockClient handle this and return an error message
404
 
405
  # Initialize Bedrock client
 
416
  model_type=ModelType.LLM
417
  )
418
  elif request.provider == "azure":
419
+ # Using Azure AI
420
 
421
  # Initialize Azure AI client
422
  model = AzureAIClient()
 
458
  elif request.provider == "openrouter":
459
  try:
460
  # Get the response and handle it properly using the previously created api_kwargs
461
+ # Making OpenRouter API call
462
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
463
  # Handle streaming response from OpenRouter
464
  async for chunk in response:
465
  yield chunk
466
  except Exception as e_openrouter:
467
+ print(f"Error with OpenRouter API: {str(e_openrouter)}")
468
  yield f"\nError with OpenRouter API: {str(e_openrouter)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
469
  elif request.provider == "openai":
470
  try:
471
  # Get the response and handle it properly using the previously created api_kwargs
472
+ # Making OpenAI API call
473
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
474
  # Handle streaming response from Openai
475
  async for chunk in response:
 
481
  if text is not None:
482
  yield text
483
  except Exception as e_openai:
484
+ print(f"Error with OpenAI API: {str(e_openai)}")
485
  yield f"\nError with Openai API: {str(e_openai)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
486
  elif request.provider == "bedrock":
487
  try:
488
  # Get the response and handle it properly using the previously created api_kwargs
489
+ # Making AWS Bedrock API call
490
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
491
  # Handle response from Bedrock (not streaming yet)
492
  if isinstance(response, str):
 
495
  # Try to extract text from the response
496
  yield str(response)
497
  except Exception as e_bedrock:
498
+ print(f"Error with AWS Bedrock API: {str(e_bedrock)}")
499
  yield f"\nError with AWS Bedrock API: {str(e_bedrock)}\n\nPlease check that you have set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables with valid credentials."
500
  elif request.provider == "azure":
501
  try:
502
  # Get the response and handle it properly using the previously created api_kwargs
503
+ # Making Azure AI API call
504
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
505
  # Handle streaming response from Azure AI
506
  async for chunk in response:
 
512
  if text is not None:
513
  yield text
514
  except Exception as e_azure:
515
+ print(f"Error with Azure AI API: {str(e_azure)}")
516
  yield f"\nError with Azure AI API: {str(e_azure)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
517
  else:
518
  # Generate streaming response
 
523
  yield chunk.text
524
 
525
  except Exception as e_outer:
526
+ print(f"Error in streaming response: {str(e_outer)}")
527
  error_message = str(e_outer)
528
 
529
  # Check for token limit errors
530
  if "maximum context length" in error_message or "token limit" in error_message or "too many tokens" in error_message:
531
  # If we hit a token limit error, try again without context
532
+ print("Warning: Token limit exceeded, retrying without context")
533
  try:
534
  # Create a simplified prompt without context
535
  simplified_prompt = f"/no_think {system_prompt}\n\n"
 
572
  )
573
 
574
  # Get the response using the simplified prompt
575
+ # Making fallback OpenRouter API call
576
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
577
 
578
  # Handle streaming fallback_response from OpenRouter
579
  async for chunk in fallback_response:
580
  yield chunk
581
  except Exception as e_fallback:
582
+ print(f"Error with OpenRouter API fallback: {str(e_fallback)}")
583
  yield f"\nError with OpenRouter API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
584
  elif request.provider == "openai":
585
  try:
 
591
  )
592
 
593
  # Get the response using the simplified prompt
594
+ # Making fallback OpenAI API call
595
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
596
 
597
  # Handle streaming fallback_response from Openai
 
599
  text = chunk if isinstance(chunk, str) else getattr(chunk, 'text', str(chunk))
600
  yield text
601
  except Exception as e_fallback:
602
+ print(f"Error with OpenAI API fallback: {str(e_fallback)}")
603
  yield f"\nError with Openai API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
604
  elif request.provider == "bedrock":
605
  try:
 
611
  )
612
 
613
  # Get the response using the simplified prompt
614
+ # Making fallback AWS Bedrock API call
615
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
616
 
617
  # Handle response from Bedrock
 
621
  # Try to extract text from the response
622
  yield str(fallback_response)
623
  except Exception as e_fallback:
624
+ print(f"Error with AWS Bedrock API fallback: {str(e_fallback)}")
625
  yield f"\nError with AWS Bedrock API fallback: {str(e_fallback)}\n\nPlease check that you have set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables with valid credentials."
626
  elif request.provider == "azure":
627
  try:
 
633
  )
634
 
635
  # Get the response using the simplified prompt
636
+ # Making fallback Azure AI API call
637
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
638
 
639
  # Handle streaming fallback response from Azure AI
 
646
  if text is not None:
647
  yield text
648
  except Exception as e_fallback:
649
+ print(f"Error with Azure AI API fallback: {str(e_fallback)}")
650
  yield f"\nError with Azure AI API fallback: {str(e_fallback)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
651
  else:
652
  # Initialize Google Generative AI model
 
667
  if hasattr(chunk, 'text'):
668
  yield chunk.text
669
  except Exception as e2:
670
+ print(f"Error in fallback streaming response: {str(e2)}")
671
  yield f"\nI apologize, but your request is too large for me to process. Please try a shorter query or break it into smaller parts."
672
  else:
673
  # For other errors, return the error message
 
680
  raise
681
  except Exception as e_handler:
682
  error_msg = f"Error in streaming chat completion: {str(e_handler)}"
683
+ print(f"Error: {error_msg}")
684
  raise HTTPException(status_code=500, detail=error_msg)
685
 
686
  @app.get("/")
api/websocket_wiki.py CHANGED
@@ -67,9 +67,9 @@ async def handle_websocket_chat(websocket: WebSocket):
67
  last_message = request.messages[-1]
68
  if hasattr(last_message, 'content') and last_message.content:
69
  tokens = count_tokens(last_message.content, request.provider == "ollama")
70
- logger.info(f"Request size: {tokens} tokens")
71
  if tokens > 8000:
72
- logger.warning(f"Request exceeds recommended token limit ({tokens} > 7500)")
73
  input_too_large = True
74
 
75
  # Create a new RAG instance for this request
@@ -84,32 +84,32 @@ async def handle_websocket_chat(websocket: WebSocket):
84
 
85
  if request.excluded_dirs:
86
  excluded_dirs = [unquote(dir_path) for dir_path in request.excluded_dirs.split('\n') if dir_path.strip()]
87
- logger.info(f"Using custom excluded directories: {excluded_dirs}")
88
  if request.excluded_files:
89
  excluded_files = [unquote(file_pattern) for file_pattern in request.excluded_files.split('\n') if file_pattern.strip()]
90
- logger.info(f"Using custom excluded files: {excluded_files}")
91
  if request.included_dirs:
92
  included_dirs = [unquote(dir_path) for dir_path in request.included_dirs.split('\n') if dir_path.strip()]
93
- logger.info(f"Using custom included directories: {included_dirs}")
94
  if request.included_files:
95
  included_files = [unquote(file_pattern) for file_pattern in request.included_files.split('\n') if file_pattern.strip()]
96
- logger.info(f"Using custom included files: {included_files}")
97
 
98
  request_rag.prepare_retriever(request.repo_url, request.type, request.token, excluded_dirs, excluded_files, included_dirs, included_files)
99
- logger.info(f"Retriever prepared for {request.repo_url}")
100
  except ValueError as e:
101
  if "No valid documents with embeddings found" in str(e):
102
- logger.error(f"No valid embeddings found: {str(e)}")
103
  await websocket.send_text("Error: No valid document embeddings found. This may be due to embedding size inconsistencies or API errors during document processing. Please try again or check your repository content.")
104
  await websocket.close()
105
  return
106
  else:
107
- logger.error(f"ValueError preparing retriever: {str(e)}")
108
  await websocket.send_text(f"Error preparing retriever: {str(e)}")
109
  await websocket.close()
110
  return
111
  except Exception as e:
112
- logger.error(f"Error preparing retriever: {str(e)}")
113
  # Check for specific embedding-related errors
114
  if "All embeddings should be of the same size" in str(e):
115
  await websocket.send_text("Error: Inconsistent embedding sizes detected. Some documents may have failed to embed properly. Please try again.")
@@ -158,7 +158,7 @@ async def handle_websocket_chat(websocket: WebSocket):
158
  # Count research iterations if this is a Deep Research request
159
  if is_deep_research:
160
  research_iteration = sum(1 for msg in request.messages if msg.role == 'assistant') + 1
161
- logger.info(f"Deep Research request detected - iteration {research_iteration}")
162
 
163
  # Check if this is a continuation request
164
  if "continue" in last_message.content.lower() and "research" in last_message.content.lower():
@@ -167,13 +167,13 @@ async def handle_websocket_chat(websocket: WebSocket):
167
  for msg in request.messages:
168
  if msg.role == "user" and "continue" not in msg.content.lower():
169
  original_topic = msg.content.replace("[DEEP RESEARCH]", "").strip()
170
- logger.info(f"Found original research topic: {original_topic}")
171
  break
172
 
173
  if original_topic:
174
  # Replace the continuation message with the original topic
175
  last_message.content = original_topic
176
- logger.info(f"Using original topic for research: {original_topic}")
177
 
178
  # Get the query from the last message
179
  query = last_message.content
@@ -189,7 +189,7 @@ async def handle_websocket_chat(websocket: WebSocket):
189
  if request.filePath:
190
  # Use the file path to get relevant context about the file
191
  rag_query = f"Contexts related to {request.filePath}"
192
- logger.info(f"Modified RAG query to focus on file: {request.filePath}")
193
 
194
  # Try to perform RAG retrieval
195
  try:
@@ -199,7 +199,7 @@ async def handle_websocket_chat(websocket: WebSocket):
199
  if retrieved_documents and retrieved_documents[0].documents:
200
  # Format context for the prompt in a more structured way
201
  documents = retrieved_documents[0].documents
202
- logger.info(f"Retrieved {len(documents)} documents")
203
 
204
  # Group documents by file path
205
  docs_by_file = {}
@@ -222,13 +222,13 @@ async def handle_websocket_chat(websocket: WebSocket):
222
  # Join all parts with clear separation
223
  context_text = "\n\n" + "-" * 10 + "\n\n".join(context_parts)
224
  else:
225
- logger.warning("No documents retrieved from RAG")
226
  except Exception as e:
227
- logger.error(f"Error in RAG retrieval: {str(e)}")
228
  # Continue without RAG if there's an error
229
 
230
  except Exception as e:
231
- logger.error(f"Error retrieving documents: {str(e)}")
232
  context_text = ""
233
 
234
  # Get repository information
@@ -392,9 +392,9 @@ This file contains...
392
  if request.filePath:
393
  try:
394
  file_content = get_file_content(request.repo_url, request.filePath, request.type, request.token)
395
- logger.info(f"Successfully retrieved content for file: {request.filePath}")
396
  except Exception as e:
397
- logger.error(f"Error retrieving file content: {str(e)}")
398
  # Continue without file content if there's an error
399
 
400
  # Format conversation history
@@ -421,7 +421,7 @@ This file contains...
421
  prompt += f"{CONTEXT_START}\n{context_text}\n{CONTEXT_END}\n\n"
422
  else:
423
  # Add a note that we're skipping RAG due to size constraints or because it's the isolated API
424
- logger.info("No context available from RAG")
425
  prompt += "<note>Answering without retrieval augmentation.</note>\n\n"
426
 
427
  prompt += f"<query>\n{query}\n</query>\n\nAssistant: "
@@ -448,11 +448,11 @@ This file contains...
448
  model_type=ModelType.LLM
449
  )
450
  elif request.provider == "openrouter":
451
- logger.info(f"Using OpenRouter with model: {request.model}")
452
 
453
  # Check if OpenRouter API key is set
454
  if not OPENROUTER_API_KEY:
455
- logger.warning("OPENROUTER_API_KEY not configured, but continuing with request")
456
  # We'll let the OpenRouterClient handle this and return a friendly error message
457
 
458
  model = OpenRouterClient()
@@ -471,11 +471,11 @@ This file contains...
471
  model_type=ModelType.LLM
472
  )
473
  elif request.provider == "openai":
474
- logger.info(f"Using Openai protocol with model: {request.model}")
475
 
476
  # Check if an API key is set for Openai
477
  if not OPENAI_API_KEY:
478
- logger.warning("OPENAI_API_KEY not configured, but continuing with request")
479
  # We'll let the OpenAIClient handle this and return an error message
480
 
481
  # Initialize Openai client
@@ -495,7 +495,7 @@ This file contains...
495
  model_type=ModelType.LLM
496
  )
497
  elif request.provider == "azure":
498
- logger.info(f"Using Azure AI with model: {request.model}")
499
 
500
  # Initialize Azure AI client
501
  model = AzureAIClient()
@@ -512,7 +512,7 @@ This file contains...
512
  model_type=ModelType.LLM
513
  )
514
  elif request.provider == "dashscope":
515
- logger.info(f"Using Dashscope with model: {request.model}")
516
 
517
  # Initialize Dashscope client
518
  model = DashscopeClient()
@@ -555,7 +555,7 @@ This file contains...
555
  elif request.provider == "openrouter":
556
  try:
557
  # Get the response and handle it properly using the previously created api_kwargs
558
- logger.info("Making OpenRouter API call")
559
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
560
  # Handle streaming response from OpenRouter
561
  async for chunk in response:
@@ -563,7 +563,7 @@ This file contains...
563
  # Explicitly close the WebSocket connection after the response is complete
564
  await websocket.close()
565
  except Exception as e_openrouter:
566
- logger.error(f"Error with OpenRouter API: {str(e_openrouter)}")
567
  error_msg = f"\nError with OpenRouter API: {str(e_openrouter)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
568
  await websocket.send_text(error_msg)
569
  # Close the WebSocket connection after sending the error message
@@ -571,7 +571,7 @@ This file contains...
571
  elif request.provider == "openai":
572
  try:
573
  # Get the response and handle it properly using the previously created api_kwargs
574
- logger.info("Making Openai API call")
575
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
576
  # Handle streaming response from Openai
577
  async for chunk in response:
@@ -585,7 +585,7 @@ This file contains...
585
  # Explicitly close the WebSocket connection after the response is complete
586
  await websocket.close()
587
  except Exception as e_openai:
588
- logger.error(f"Error with Openai API: {str(e_openai)}")
589
  error_msg = f"\nError with Openai API: {str(e_openai)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
590
  await websocket.send_text(error_msg)
591
  # Close the WebSocket connection after sending the error message
@@ -593,7 +593,7 @@ This file contains...
593
  elif request.provider == "azure":
594
  try:
595
  # Get the response and handle it properly using the previously created api_kwargs
596
- logger.info("Making Azure AI API call")
597
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
598
  # Handle streaming response from Azure AI
599
  async for chunk in response:
@@ -607,7 +607,7 @@ This file contains...
607
  # Explicitly close the WebSocket connection after the response is complete
608
  await websocket.close()
609
  except Exception as e_azure:
610
- logger.error(f"Error with Azure AI API: {str(e_azure)}")
611
  error_msg = f"\nError with Azure AI API: {str(e_azure)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
612
  await websocket.send_text(error_msg)
613
  # Close the WebSocket connection after sending the error message
@@ -623,13 +623,13 @@ This file contains...
623
  await websocket.close()
624
 
625
  except Exception as e_outer:
626
- logger.error(f"Error in streaming response: {str(e_outer)}")
627
  error_message = str(e_outer)
628
 
629
  # Check for token limit errors
630
  if "maximum context length" in error_message or "token limit" in error_message or "too many tokens" in error_message:
631
  # If we hit a token limit error, try again without context
632
- logger.warning("Token limit exceeded, retrying without context")
633
  try:
634
  # Create a simplified prompt without context
635
  simplified_prompt = f"/no_think {system_prompt}\n\n"
@@ -672,14 +672,14 @@ This file contains...
672
  )
673
 
674
  # Get the response using the simplified prompt
675
- logger.info("Making fallback OpenRouter API call")
676
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
677
 
678
  # Handle streaming fallback_response from OpenRouter
679
  async for chunk in fallback_response:
680
  await websocket.send_text(chunk)
681
  except Exception as e_fallback:
682
- logger.error(f"Error with OpenRouter API fallback: {str(e_fallback)}")
683
  error_msg = f"\nError with OpenRouter API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
684
  await websocket.send_text(error_msg)
685
  elif request.provider == "openai":
@@ -692,7 +692,7 @@ This file contains...
692
  )
693
 
694
  # Get the response using the simplified prompt
695
- logger.info("Making fallback Openai API call")
696
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
697
 
698
  # Handle streaming fallback_response from Openai
@@ -700,7 +700,7 @@ This file contains...
700
  text = chunk if isinstance(chunk, str) else getattr(chunk, 'text', str(chunk))
701
  await websocket.send_text(text)
702
  except Exception as e_fallback:
703
- logger.error(f"Error with Openai API fallback: {str(e_fallback)}")
704
  error_msg = f"\nError with Openai API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
705
  await websocket.send_text(error_msg)
706
  elif request.provider == "azure":
@@ -713,7 +713,7 @@ This file contains...
713
  )
714
 
715
  # Get the response using the simplified prompt
716
- logger.info("Making fallback Azure AI API call")
717
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
718
 
719
  # Handle streaming fallback response from Azure AI
@@ -726,7 +726,7 @@ This file contains...
726
  if text is not None:
727
  await websocket.send_text(text)
728
  except Exception as e_fallback:
729
- logger.error(f"Error with Azure AI API fallback: {str(e_fallback)}")
730
  error_msg = f"\nError with Azure AI API fallback: {str(e_fallback)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
731
  await websocket.send_text(error_msg)
732
  else:
@@ -748,7 +748,7 @@ This file contains...
748
  if hasattr(chunk, 'text'):
749
  await websocket.send_text(chunk.text)
750
  except Exception as e2:
751
- logger.error(f"Error in fallback streaming response: {str(e2)}")
752
  await websocket.send_text(f"\nI apologize, but your request is too large for me to process. Please try a shorter query or break it into smaller parts.")
753
  # Close the WebSocket connection after sending the error message
754
  await websocket.close()
@@ -759,9 +759,9 @@ This file contains...
759
  await websocket.close()
760
 
761
  except WebSocketDisconnect:
762
- logger.info("WebSocket disconnected")
763
  except Exception as e:
764
- logger.error(f"Error in WebSocket handler: {str(e)}")
765
  try:
766
  await websocket.send_text(f"Error: {str(e)}")
767
  await websocket.close()
 
67
  last_message = request.messages[-1]
68
  if hasattr(last_message, 'content') and last_message.content:
69
  tokens = count_tokens(last_message.content, request.provider == "ollama")
70
+ # Request size check
71
  if tokens > 8000:
72
+ print(f"Warning: Request exceeds recommended token limit ({tokens} > 7500)")
73
  input_too_large = True
74
 
75
  # Create a new RAG instance for this request
 
84
 
85
  if request.excluded_dirs:
86
  excluded_dirs = [unquote(dir_path) for dir_path in request.excluded_dirs.split('\n') if dir_path.strip()]
87
+ # Using custom excluded directories
88
  if request.excluded_files:
89
  excluded_files = [unquote(file_pattern) for file_pattern in request.excluded_files.split('\n') if file_pattern.strip()]
90
+ # Using custom excluded files
91
  if request.included_dirs:
92
  included_dirs = [unquote(dir_path) for dir_path in request.included_dirs.split('\n') if dir_path.strip()]
93
+ # Using custom included directories
94
  if request.included_files:
95
  included_files = [unquote(file_pattern) for file_pattern in request.included_files.split('\n') if file_pattern.strip()]
96
+ # Using custom included files
97
 
98
  request_rag.prepare_retriever(request.repo_url, request.type, request.token, excluded_dirs, excluded_files, included_dirs, included_files)
99
+ print(f"Retriever prepared for {request.repo_url}")
100
  except ValueError as e:
101
  if "No valid documents with embeddings found" in str(e):
102
+ print(f"Error: No valid embeddings found: {str(e)}")
103
  await websocket.send_text("Error: No valid document embeddings found. This may be due to embedding size inconsistencies or API errors during document processing. Please try again or check your repository content.")
104
  await websocket.close()
105
  return
106
  else:
107
+ print(f"Error: ValueError preparing retriever: {str(e)}")
108
  await websocket.send_text(f"Error preparing retriever: {str(e)}")
109
  await websocket.close()
110
  return
111
  except Exception as e:
112
+ print(f"Error preparing retriever: {str(e)}")
113
  # Check for specific embedding-related errors
114
  if "All embeddings should be of the same size" in str(e):
115
  await websocket.send_text("Error: Inconsistent embedding sizes detected. Some documents may have failed to embed properly. Please try again.")
 
158
  # Count research iterations if this is a Deep Research request
159
  if is_deep_research:
160
  research_iteration = sum(1 for msg in request.messages if msg.role == 'assistant') + 1
161
+ print(f"Deep Research request detected - iteration {research_iteration}")
162
 
163
  # Check if this is a continuation request
164
  if "continue" in last_message.content.lower() and "research" in last_message.content.lower():
 
167
  for msg in request.messages:
168
  if msg.role == "user" and "continue" not in msg.content.lower():
169
  original_topic = msg.content.replace("[DEEP RESEARCH]", "").strip()
170
+ # Found original research topic
171
  break
172
 
173
  if original_topic:
174
  # Replace the continuation message with the original topic
175
  last_message.content = original_topic
176
+ # Using original topic for research
177
 
178
  # Get the query from the last message
179
  query = last_message.content
 
189
  if request.filePath:
190
  # Use the file path to get relevant context about the file
191
  rag_query = f"Contexts related to {request.filePath}"
192
+ # Modified RAG query to focus on file
193
 
194
  # Try to perform RAG retrieval
195
  try:
 
199
  if retrieved_documents and retrieved_documents[0].documents:
200
  # Format context for the prompt in a more structured way
201
  documents = retrieved_documents[0].documents
202
+ # Retrieved documents
203
 
204
  # Group documents by file path
205
  docs_by_file = {}
 
222
  # Join all parts with clear separation
223
  context_text = "\n\n" + "-" * 10 + "\n\n".join(context_parts)
224
  else:
225
+ print("Warning: No documents retrieved from RAG")
226
  except Exception as e:
227
+ print(f"Error in RAG retrieval: {str(e)}")
228
  # Continue without RAG if there's an error
229
 
230
  except Exception as e:
231
+ print(f"Error retrieving documents: {str(e)}")
232
  context_text = ""
233
 
234
  # Get repository information
 
392
  if request.filePath:
393
  try:
394
  file_content = get_file_content(request.repo_url, request.filePath, request.type, request.token)
395
+ # Successfully retrieved content for file
396
  except Exception as e:
397
+ print(f"Error retrieving file content: {str(e)}")
398
  # Continue without file content if there's an error
399
 
400
  # Format conversation history
 
421
  prompt += f"{CONTEXT_START}\n{context_text}\n{CONTEXT_END}\n\n"
422
  else:
423
  # Add a note that we're skipping RAG due to size constraints or because it's the isolated API
424
+ # No context available from RAG
425
  prompt += "<note>Answering without retrieval augmentation.</note>\n\n"
426
 
427
  prompt += f"<query>\n{query}\n</query>\n\nAssistant: "
 
448
  model_type=ModelType.LLM
449
  )
450
  elif request.provider == "openrouter":
451
+ # Using OpenRouter
452
 
453
  # Check if OpenRouter API key is set
454
  if not OPENROUTER_API_KEY:
455
+ print("Warning: OPENROUTER_API_KEY not configured")
456
  # We'll let the OpenRouterClient handle this and return a friendly error message
457
 
458
  model = OpenRouterClient()
 
471
  model_type=ModelType.LLM
472
  )
473
  elif request.provider == "openai":
474
+ # Using OpenAI
475
 
476
  # Check if an API key is set for Openai
477
  if not OPENAI_API_KEY:
478
+ print("Warning: OPENAI_API_KEY not configured")
479
  # We'll let the OpenAIClient handle this and return an error message
480
 
481
  # Initialize Openai client
 
495
  model_type=ModelType.LLM
496
  )
497
  elif request.provider == "azure":
498
+ # Using Azure AI
499
 
500
  # Initialize Azure AI client
501
  model = AzureAIClient()
 
512
  model_type=ModelType.LLM
513
  )
514
  elif request.provider == "dashscope":
515
+ # Using Dashscope
516
 
517
  # Initialize Dashscope client
518
  model = DashscopeClient()
 
555
  elif request.provider == "openrouter":
556
  try:
557
  # Get the response and handle it properly using the previously created api_kwargs
558
+ # Making OpenRouter API call
559
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
560
  # Handle streaming response from OpenRouter
561
  async for chunk in response:
 
563
  # Explicitly close the WebSocket connection after the response is complete
564
  await websocket.close()
565
  except Exception as e_openrouter:
566
+ print(f"Error with OpenRouter API: {str(e_openrouter)}")
567
  error_msg = f"\nError with OpenRouter API: {str(e_openrouter)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
568
  await websocket.send_text(error_msg)
569
  # Close the WebSocket connection after sending the error message
 
571
  elif request.provider == "openai":
572
  try:
573
  # Get the response and handle it properly using the previously created api_kwargs
574
+ # Making OpenAI API call
575
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
576
  # Handle streaming response from Openai
577
  async for chunk in response:
 
585
  # Explicitly close the WebSocket connection after the response is complete
586
  await websocket.close()
587
  except Exception as e_openai:
588
+ print(f"Error with OpenAI API: {str(e_openai)}")
589
  error_msg = f"\nError with Openai API: {str(e_openai)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
590
  await websocket.send_text(error_msg)
591
  # Close the WebSocket connection after sending the error message
 
593
  elif request.provider == "azure":
594
  try:
595
  # Get the response and handle it properly using the previously created api_kwargs
596
+ # Making Azure AI API call
597
  response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
598
  # Handle streaming response from Azure AI
599
  async for chunk in response:
 
607
  # Explicitly close the WebSocket connection after the response is complete
608
  await websocket.close()
609
  except Exception as e_azure:
610
+ print(f"Error with Azure AI API: {str(e_azure)}")
611
  error_msg = f"\nError with Azure AI API: {str(e_azure)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
612
  await websocket.send_text(error_msg)
613
  # Close the WebSocket connection after sending the error message
 
623
  await websocket.close()
624
 
625
  except Exception as e_outer:
626
+ print(f"Error in streaming response: {str(e_outer)}")
627
  error_message = str(e_outer)
628
 
629
  # Check for token limit errors
630
  if "maximum context length" in error_message or "token limit" in error_message or "too many tokens" in error_message:
631
  # If we hit a token limit error, try again without context
632
+ print("Warning: Token limit exceeded, retrying without context")
633
  try:
634
  # Create a simplified prompt without context
635
  simplified_prompt = f"/no_think {system_prompt}\n\n"
 
672
  )
673
 
674
  # Get the response using the simplified prompt
675
+ # Making fallback OpenRouter API call
676
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
677
 
678
  # Handle streaming fallback_response from OpenRouter
679
  async for chunk in fallback_response:
680
  await websocket.send_text(chunk)
681
  except Exception as e_fallback:
682
+ print(f"Error with OpenRouter API fallback: {str(e_fallback)}")
683
  error_msg = f"\nError with OpenRouter API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
684
  await websocket.send_text(error_msg)
685
  elif request.provider == "openai":
 
692
  )
693
 
694
  # Get the response using the simplified prompt
695
+ # Making fallback OpenAI API call
696
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
697
 
698
  # Handle streaming fallback_response from Openai
 
700
  text = chunk if isinstance(chunk, str) else getattr(chunk, 'text', str(chunk))
701
  await websocket.send_text(text)
702
  except Exception as e_fallback:
703
+ print(f"Error with OpenAI API fallback: {str(e_fallback)}")
704
  error_msg = f"\nError with Openai API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
705
  await websocket.send_text(error_msg)
706
  elif request.provider == "azure":
 
713
  )
714
 
715
  # Get the response using the simplified prompt
716
+ # Making fallback Azure AI API call
717
  fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
718
 
719
  # Handle streaming fallback response from Azure AI
 
726
  if text is not None:
727
  await websocket.send_text(text)
728
  except Exception as e_fallback:
729
+ print(f"Error with Azure AI API fallback: {str(e_fallback)}")
730
  error_msg = f"\nError with Azure AI API fallback: {str(e_fallback)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
731
  await websocket.send_text(error_msg)
732
  else:
 
748
  if hasattr(chunk, 'text'):
749
  await websocket.send_text(chunk.text)
750
  except Exception as e2:
751
+ print(f"Error in fallback streaming response: {str(e2)}")
752
  await websocket.send_text(f"\nI apologize, but your request is too large for me to process. Please try a shorter query or break it into smaller parts.")
753
  # Close the WebSocket connection after sending the error message
754
  await websocket.close()
 
759
  await websocket.close()
760
 
761
  except WebSocketDisconnect:
762
+ # WebSocket disconnected
763
  except Exception as e:
764
+ print(f"Error in WebSocket handler: {str(e)}")
765
  try:
766
  await websocket.send_text(f"Error: {str(e)}")
767
  await websocket.close()