Gemini
feat: add detailed logging
01d5a5d
from collections import deque
from datetime import datetime
from typing import List, Dict, Any
import json
import numpy as np
from lpm_kernel.L1.bio import Cluster
import logging
TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
def get_cur_time() -> str:
"""
Returns the current time formatted as a string.
Returns:
str: Current time formatted according to TIME_FORMAT.
"""
cur_time = datetime.now().strftime(TIME_FORMAT)
return cur_time
def find_connected_components(
cluster_list: List[Cluster], cluster_merge_distance: float
) -> List[List[Cluster]]:
"""
Finds connected components in a list of clusters based on a distance threshold.
Args:
cluster_list: List of Cluster objects to analyze.
cluster_merge_distance: Maximum distance for clusters to be considered connected.
Returns:
List[List[Cluster]]: List of connected components, where each component is a list of clusters.
"""
adjacency_matrix = np.array(
[
[
np.linalg.norm(cluster1.cluster_center - cluster2.cluster_center)
for cluster2 in cluster_list
]
for cluster1 in cluster_list
]
)
cluster_n = len(cluster_list)
visited = [False] * cluster_n
components = []
def bfs(start: int):
queue = deque([start])
component = []
visited[start] = True
while queue:
node = queue.popleft()
component.append(node)
for neighbor in range(cluster_n):
if (
not visited[neighbor]
and adjacency_matrix[node, neighbor] < cluster_merge_distance
):
visited[neighbor] = True
queue.append(neighbor)
return component
for i in range(cluster_n):
if not visited[i]:
components.append(bfs(i))
return [[cluster_list[i] for i in component] for component in components]
def is_valid_note(note: Dict[str, Any]) -> bool:
"""
Checks if a note contains valid creation time information.
Args:
note: Dictionary containing note data.
Returns:
bool: True if the note has a valid creation time, False otherwise.
"""
if "createTime" in note and note["createTime"]:
return True
return False
def is_valid_todo(todo: Dict[str, Any]) -> bool:
"""
Checks if a todo item contains valid creation time information.
Args:
todo: Dictionary containing todo data.
Returns:
bool: True if the todo has a valid creation time, False otherwise.
"""
if "createTime" in todo and todo["createTime"]:
return True
return False
def is_valid_chat(chat: Dict[str, Any]) -> bool:
"""
Checks if a chat contains valid creation time and summary information.
Args:
chat: Dictionary containing chat data.
Returns:
bool: True if the chat has valid creation time and summary, False otherwise.
"""
if (
"createTime" in chat
and chat["createTime"]
and "summary" in chat
and chat["summary"]
):
return True
return False
def save_true_topics(true_topics_res: Dict[str, Dict], topics_path: str) -> None:
"""
Save topics clustering results to a JSON file, excluding embedding data.
Args:
true_topics_res: Dictionary containing topic clustering results.
topics_path: Path to save the JSON file.
"""
# Create a copy to avoid modifying original
topics_to_save = {}
for cluster_id, cluster_data in true_topics_res.items():
# Create new cluster dict without embeddings
topics_to_save[cluster_id] = {
"indices": cluster_data["indices"],
"docIds": cluster_data["docIds"],
"contents": cluster_data["contents"],
"chunkIds": cluster_data["chunkIds"],
"tags": cluster_data["tags"],
"topic": cluster_data["topic"],
"topicId": cluster_data["topicId"],
"recTimes": cluster_data["recTimes"],
}
# Save to JSON file
with open(topics_path, "w", encoding="utf-8") as f:
json.dump(topics_to_save, f, ensure_ascii=False, indent=4)