Safetensors
English
llava
video-retrieval
text-to-video-search
multimodal-embedding
File size: 4,800 Bytes
7daf628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
"""
Utilities for input-output loading/saving.
"""

from typing import Any, List
import yaml
import pickle
import json
import pandas as pd


class PrettySafeLoader(yaml.SafeLoader):
    """Custom loader for reading YAML files"""
    def construct_python_tuple(self, node):
        return tuple(self.construct_sequence(node))


PrettySafeLoader.add_constructor(
    u'tag:yaml.org,2002:python/tuple',
    PrettySafeLoader.construct_python_tuple
)


def load_yml(path: str, loader_type: str = 'default'):
    """Read params from a yml file.

    Args:
        path (str): path to the .yml file
        loader_type (str, optional): type of loader used to load yml files. Defaults to 'default'.

    Returns:
        Any: object (typically dict) loaded from .yml file
    """
    assert loader_type in ['default', 'safe']

    loader = yaml.Loader if (loader_type == "default") else PrettySafeLoader

    with open(path, 'r') as f:
        data = yaml.load(f, Loader=loader)

    return data


def save_yml(data: dict, path: str):
    """Save params in the given yml file path.

    Args:
        data (dict): data object to save
        path (str): path to .yml file to be saved
    """
    with open(path, 'w') as f:
        yaml.dump(data, f, default_flow_style=False)


def load_pkl(path: str, encoding: str = "ascii"):
    """Loads a .pkl file.

    Args:
        path (str): path to the .pkl file
        encoding (str, optional): encoding to use for loading. Defaults to "ascii".

    Returns:
        Any: unpickled object
    """
    return pickle.load(open(path, "rb"), encoding=encoding)


def save_pkl(data: Any, path: str) -> None:
    """Saves given object into .pkl file

    Args:
        data (Any): object to be saved
        path (str): path to the location to be saved at
    """
    with open(path, 'wb') as f:
        pickle.dump(data, f)


def load_json(path: str) -> dict:
    """Helper to load json file"""
    with open(path, 'rb') as f:
        data = json.load(f)
    return data


def save_json(data: dict, path: str):
    """Helper to save `dict` as .json file."""
    with open(path, 'w') as f:
        json.dump(data, f, indent=2)


def load_txt(path: str):
    """Loads lines of a .txt file.

    Args:
        path (str): path to the .txt file

    Returns:
        List: lines of .txt file
    """
    with open(path) as f:
        lines = f.read().splitlines()
    return lines


def save_txt(data: dict, path: str):
    """Writes data (lines) to a txt file.

    Args:
        data (dict): List of strings
        path (str): path to .txt file
    """
    assert isinstance(data, list)

    lines = "\n".join(data)
    with open(path, "w") as f:
        f.write(str(lines))


def read_spreadsheet(sheet_id, gid, url=None, drop_na=True, **kwargs):
    if url is None:
        BASE_URL = 'https://docs.google.com/spreadsheets/d/'
        url = BASE_URL + sheet_id + f'/export?gid={gid}&format=csv'
    df = pd.read_csv(url, **kwargs)
    
    if drop_na:
        # drop all rows which have atleast 1 NaN value
        df = df.dropna(axis=0)

    return df


def load_midi(file, rate=16000):
    import pretty_midi
    assert file.endswith('.mid')
    pm = pretty_midi.PrettyMIDI(file)
    y = pm.synthesize(fs=rate)
    return y, rate


def load_ptz(path):
    import gzip
    import torch
    with gzip.open(path, 'rb') as f:
        data = torch.load(f)
    return data


def save_video(frames, path, fps=30):
    import imageio
    imageio.mimwrite(path, frames, fps=fps)


def read_spreadsheet(sheet_id, gid, gid_key="granularity", **kwargs):
    BASE_URL = 'https://docs.google.com/spreadsheets/d/'
    df = df = pd.read_csv(BASE_URL + sheet_id + f'/export?gid={gid}&format=csv', **kwargs)
    return df


def load_jsonl(file_path: str) -> list:
    """Load data from a JSONL file.
    
    Args:
        file_path (str): Path to the JSONL file
        
    Returns:
        list: List of dictionaries, where each dictionary is a JSON object from the file
        
    Example:
        >>> data = load_jsonl("path/to/file.jsonl")
        >>> print(data[0])  # Print first JSON object
    """
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            if line.strip():  # Skip empty lines
                data.append(json.loads(line))
    return data


def save_jsonl(data: list, file_path: str) -> None:
    """Save data to a JSONL file.
    
    Args:
        data (list): List of dictionaries to save
        file_path (str): Path where to save the JSONL file
        
    Example:
        >>> data = [{"text": "hello"}, {"text": "world"}]
        >>> save_jsonl(data, "output.jsonl")
    """
    with open(file_path, 'w', encoding='utf-8') as f:
        for item in data:
            f.write(json.dumps(item) + '\n')