Spaces:

harvesthealth
/

secondme-api

Sleeping

File size: 4,336 Bytes

01d5a5d

from collections import deque
from datetime import datetime
from typing import List, Dict, Any
import json

import numpy as np

from lpm_kernel.L1.bio import Cluster
import logging


TIME_FORMAT = "%Y-%m-%d %H:%M:%S"


def get_cur_time() -> str:
    """
    Returns the current time formatted as a string.
    
    Returns:
        str: Current time formatted according to TIME_FORMAT.
    """
    cur_time = datetime.now().strftime(TIME_FORMAT)
    return cur_time


def find_connected_components(
    cluster_list: List[Cluster], cluster_merge_distance: float
) -> List[List[Cluster]]:
    """
    Finds connected components in a list of clusters based on a distance threshold.
    
    Args:
        cluster_list: List of Cluster objects to analyze.
        cluster_merge_distance: Maximum distance for clusters to be considered connected.
        
    Returns:
        List[List[Cluster]]: List of connected components, where each component is a list of clusters.
    """
    adjacency_matrix = np.array(
        [
            [
                np.linalg.norm(cluster1.cluster_center - cluster2.cluster_center)
                for cluster2 in cluster_list
            ]
            for cluster1 in cluster_list
        ]
    )

    cluster_n = len(cluster_list)
    visited = [False] * cluster_n
    components = []

    def bfs(start: int):
        queue = deque([start])
        component = []
        visited[start] = True

        while queue:
            node = queue.popleft()
            component.append(node)
            for neighbor in range(cluster_n):
                if (
                    not visited[neighbor]
                    and adjacency_matrix[node, neighbor] < cluster_merge_distance
                ):
                    visited[neighbor] = True
                    queue.append(neighbor)
        return component

    for i in range(cluster_n):
        if not visited[i]:
            components.append(bfs(i))

    return [[cluster_list[i] for i in component] for component in components]


def is_valid_note(note: Dict[str, Any]) -> bool:
    """
    Checks if a note contains valid creation time information.
    
    Args:
        note: Dictionary containing note data.
        
    Returns:
        bool: True if the note has a valid creation time, False otherwise.
    """
    if "createTime" in note and note["createTime"]:
        return True
    return False


def is_valid_todo(todo: Dict[str, Any]) -> bool:
    """
    Checks if a todo item contains valid creation time information.
    
    Args:
        todo: Dictionary containing todo data.
        
    Returns:
        bool: True if the todo has a valid creation time, False otherwise.
    """
    if "createTime" in todo and todo["createTime"]:
        return True
    return False


def is_valid_chat(chat: Dict[str, Any]) -> bool:
    """
    Checks if a chat contains valid creation time and summary information.
    
    Args:
        chat: Dictionary containing chat data.
        
    Returns:
        bool: True if the chat has valid creation time and summary, False otherwise.
    """
    if (
        "createTime" in chat
        and chat["createTime"]
        and "summary" in chat
        and chat["summary"]
    ):
        return True
    return False


def save_true_topics(true_topics_res: Dict[str, Dict], topics_path: str) -> None:
    """
    Save topics clustering results to a JSON file, excluding embedding data.

    Args:
        true_topics_res: Dictionary containing topic clustering results.
        topics_path: Path to save the JSON file.
    """
    # Create a copy to avoid modifying original
    topics_to_save = {}

    for cluster_id, cluster_data in true_topics_res.items():
        # Create new cluster dict without embeddings
        topics_to_save[cluster_id] = {
            "indices": cluster_data["indices"],
            "docIds": cluster_data["docIds"],
            "contents": cluster_data["contents"],
            "chunkIds": cluster_data["chunkIds"],
            "tags": cluster_data["tags"],
            "topic": cluster_data["topic"],
            "topicId": cluster_data["topicId"],
            "recTimes": cluster_data["recTimes"],
        }

    # Save to JSON file
    with open(topics_path, "w", encoding="utf-8") as f:
        json.dump(topics_to_save, f, ensure_ascii=False, indent=4)