pszemraj commited on
Commit
3577ef9
·
verified ·
0 Parent(s):

Super-squash branch 'main' using huggingface_hub

Browse files
Files changed (10) hide show
  1. .gitattributes +35 -0
  2. .gitignore +160 -0
  3. README.md +46 -0
  4. app.py +234 -0
  5. constants.py +4 -0
  6. requirements.txt +7 -0
  7. settings.py +16 -0
  8. static/loading-icon.svg +4 -0
  9. static/styles.css +78 -0
  10. utils.py +55 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: BeeCoder Demo
3
+ emoji: 🐝
4
+ colorFrom: gray
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.28.3
8
+ app_file: app.py
9
+ pinned: true
10
+ license: apache-2.0
11
+ ---
12
+
13
+ # 🐝BeeCoder Demo🐝
14
+
15
+ ## Code-Completion Playground 💻 with 🐝[BeeCoder](https://huggingface.co/BEE-spoke-data/smol_llama-101M-GQA-python) Models
16
+
17
+ This is a demo playground for generating Python code with the power of 🐝[BeeCoder](https://huggingface.co/BEE-spoke-data/smol_llama-101M-GQA-python), a **fine-tuned** version of the tiny [101M base model](https://huggingface.co/BEE-spoke-data/smol_llama-101M-GQA) on a dataset of pypi packages.
18
+
19
+ ℹ️ This is not an instruction model but just a code completion tool.
20
+
21
+ ---
22
+
23
+ **Intended Use**: This app and its [supporting model](https://huggingface.co/BEE-spoke-data/smol_llama-101M-GQA-python) are provided for demonstration purposes only; not to serve as a replacement for human expertise. For more details on the model, please refer to the [model card](https://huggingface.co/BEE-spoke-data/smol_llama-101M-GQA-python).
24
+
25
+ In our country, we say _"To let 100M parameters model generate python script and not validate is like to let monkey fly a plane"_. So please be careful with the generated code.
26
+
27
+ ---
28
+
29
+ ## Base Model Information
30
+
31
+ The base model, smol_llama-101M-GQA, has been pre-trained on a relatively small number of high quality tokens (less than ~20B). It has impressive performance despite its compact size of 101M parameters. Training data for this base model included:
32
+
33
+ - [JeanKaddour/minipile](https://huggingface.co/datasets/JeanKaddour/minipile)
34
+ - [pszemraj/simple_wikipedia_LM](https://huggingface.co/datasets/pszemraj/simple_wikipedia_LM)
35
+ - [BEE-spoke-data/wikipedia-20230901.en-deduped](https://huggingface.co/datasets/BEE-spoke-data/wikipedia-20230901.en-deduped)
36
+ - [mattymchen/refinedweb-3m](https://huggingface.co/datasets/mattymchen/refinedweb-3m)
37
+
38
+ You can find more information about the base model [here](https://huggingface.co/BEE-spoke-data/smol_llama-101M-GQA).
39
+
40
+ ---
41
+
42
+ ### Credits
43
+
44
+ This app is modified from a demo playground originally built for [StarCoder](https://huggingface.co/bigcode/starcoder) by [BigCode](https://huggingface.co/bigcode). You can find the original demo [here](https://huggingface.co/spaces/bigcode/bigcode-playground).
45
+
46
+ ---
app.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from gradio.themes.utils import sizes
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+ import utils
7
+ from constants import END_OF_TEXT, MIN_TEMPERATURE
8
+
9
+ # Load the tokenizer and model
10
+ tokenizer = AutoTokenizer.from_pretrained(
11
+ "BEE-spoke-data/smol_llama-101M-GQA-python",
12
+ use_fast=False,
13
+ )
14
+ tokenizer.pad_token_id = tokenizer.eos_token_id
15
+ tokenizer.pad_token = END_OF_TEXT
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ "BEE-spoke-data/smol_llama-101M-GQA-python",
18
+ device_map="auto",
19
+ )
20
+ model = torch.compile(model, mode="reduce-overhead")
21
+
22
+ # UI things
23
+
24
+ _styles = utils.get_file_as_string("styles.css")
25
+
26
+ # Loads ./README.md file & splits it into sections
27
+ readme_file_content = utils.get_file_as_string("README.md", path="./")
28
+ (
29
+ manifest,
30
+ description,
31
+ disclaimer,
32
+ base_model_info,
33
+ formats,
34
+ ) = utils.get_sections(readme_file_content, "---", up_to=5)
35
+
36
+ theme = gr.themes.Soft(
37
+ primary_hue="yellow",
38
+ secondary_hue="orange",
39
+ neutral_hue="slate",
40
+ radius_size=sizes.radius_sm,
41
+ font=[
42
+ gr.themes.GoogleFont("IBM Plex Sans", [400, 600]),
43
+ "ui-sans-serif",
44
+ "system-ui",
45
+ "sans-serif",
46
+ ],
47
+ text_size=sizes.text_lg,
48
+ )
49
+
50
+
51
+ def run_inference(
52
+ prompt, temperature, max_new_tokens, top_p, repetition_penalty
53
+ ) -> str:
54
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
55
+ outputs = model.generate(
56
+ **inputs,
57
+ do_sample=True,
58
+ epsilon_cutoff=1e-3,
59
+ max_new_tokens=max_new_tokens,
60
+ min_new_tokens=2,
61
+ no_repeat_ngram_size=6,
62
+ renormalize_logits=True,
63
+ repetition_penalty=repetition_penalty,
64
+ temperature=max(temperature, MIN_TEMPERATURE),
65
+ top_p=top_p,
66
+ )
67
+ text = tokenizer.batch_decode(
68
+ outputs,
69
+ skip_special_tokens=True,
70
+ )[0]
71
+ return text
72
+
73
+
74
+ examples = [
75
+ [
76
+ 'def greet(name: str) -> None:\n """\n Greets the user\n """\n print(f"Hello,',
77
+ 0.2,
78
+ 64,
79
+ 0.9,
80
+ 1.2,
81
+ ],
82
+ [
83
+ 'for i in range(5):\n """\n Loop through 0 to 4\n """\n print(i,',
84
+ 0.2,
85
+ 64,
86
+ 0.9,
87
+ 1.2,
88
+ ],
89
+ ['x = 10\n"""Check if x is greater than 5"""\nif x > 5:', 0.2, 64, 0.9, 1.2],
90
+ ["def square(x: int) -> int:\n return", 0.2, 64, 0.9, 1.2],
91
+ ['import math\n"""Math operations"""\nmath.', 0.2, 64, 0.9, 1.2],
92
+ [
93
+ 'def is_even(n) -> bool:\n """\n Check if a number is even\n """\n if n % 2 == 0:',
94
+ 0.2,
95
+ 64,
96
+ 0.9,
97
+ 1.2,
98
+ ],
99
+ [
100
+ 'while True:\n """Infinite loop example"""\n print("Infinite loop,',
101
+ 0.2,
102
+ 64,
103
+ 0.9,
104
+ 1.2,
105
+ ],
106
+ [
107
+ "def sum_list(lst: list[int]) -> int:\n total = 0\n for item in lst:",
108
+ 0.2,
109
+ 64,
110
+ 0.9,
111
+ 1.2,
112
+ ],
113
+ [
114
+ 'try:\n """\n Exception handling\n """\n x = int(input("Enter a number: "))\nexcept ValueError:',
115
+ 0.2,
116
+ 64,
117
+ 0.9,
118
+ 1.2,
119
+ ],
120
+ [
121
+ 'def divide(a: float, b: float) -> float:\n """\n Divide a by b\n """\n if b != 0:',
122
+ 0.2,
123
+ 64,
124
+ 0.9,
125
+ 1.2,
126
+ ],
127
+ ]
128
+
129
+
130
+ # Define the Gradio Blocks interface
131
+ with gr.Blocks(theme=theme, analytics_enabled=False, css=_styles) as demo:
132
+ with gr.Column():
133
+ gr.Markdown(description)
134
+ with gr.Row():
135
+ with gr.Column():
136
+ instruction = gr.Textbox(
137
+ value=examples[0][0],
138
+ placeholder="Enter your code here",
139
+ label="Code",
140
+ elem_id="q-input",
141
+ )
142
+ submit = gr.Button("Generate", variant="primary")
143
+ output = gr.Code(elem_id="q-output", language="python", lines=10)
144
+ with gr.Row():
145
+ with gr.Column():
146
+ with gr.Accordion("Advanced settings", open=False):
147
+ with gr.Row():
148
+ column_1, column_2 = gr.Column(), gr.Column()
149
+ with column_1:
150
+ temperature = gr.Slider(
151
+ label="Temperature",
152
+ value=0.2,
153
+ minimum=0.0,
154
+ maximum=1.0,
155
+ step=0.05,
156
+ interactive=True,
157
+ info="Higher values produce more diverse outputs",
158
+ )
159
+ max_new_tokens = gr.Slider(
160
+ label="Max new tokens",
161
+ value=64,
162
+ minimum=32,
163
+ maximum=512,
164
+ step=32,
165
+ interactive=True,
166
+ info="Number of tokens to generate",
167
+ )
168
+ with column_2:
169
+ top_p = gr.Slider(
170
+ label="Top-p (nucleus sampling)",
171
+ value=0.90,
172
+ minimum=0.0,
173
+ maximum=1,
174
+ step=0.05,
175
+ interactive=True,
176
+ info="Higher values sample more low-probability tokens",
177
+ )
178
+ repetition_penalty = gr.Slider(
179
+ label="Repetition penalty",
180
+ value=1.2,
181
+ minimum=1.0,
182
+ maximum=2.0,
183
+ step=0.05,
184
+ interactive=True,
185
+ info="Penalize repeated tokens",
186
+ )
187
+ with gr.Column():
188
+ version = gr.Dropdown(
189
+ [
190
+ "smol_llama-101M-GQA-python",
191
+ ],
192
+ value="smol_llama-101M-GQA-python",
193
+ label="Version",
194
+ info="",
195
+ )
196
+ gr.Markdown(disclaimer)
197
+ gr.Examples(
198
+ examples=examples,
199
+ inputs=[
200
+ instruction,
201
+ temperature,
202
+ max_new_tokens,
203
+ top_p,
204
+ repetition_penalty,
205
+ version,
206
+ ],
207
+ cache_examples=False,
208
+ fn=run_inference,
209
+ outputs=[output],
210
+ )
211
+ gr.Markdown(base_model_info)
212
+ gr.Markdown(formats)
213
+
214
+ submit.click(
215
+ run_inference,
216
+ inputs=[
217
+ instruction,
218
+ temperature,
219
+ max_new_tokens,
220
+ top_p,
221
+ repetition_penalty,
222
+ ],
223
+ outputs=[output],
224
+ # preprocess=False,
225
+ # batch=False,
226
+ show_progress=True,
227
+ )
228
+
229
+ # .queue(max_size=10, api_open=False)
230
+ demo.launch(
231
+ debug=True,
232
+ show_api=False,
233
+ share=utils.is_google_colab(),
234
+ )
constants.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ END_OF_TEXT = "<|endoftext|>"
2
+
3
+ # Near zero temperature to avoid division by zero
4
+ MIN_TEMPERATURE = 1e-4
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Gradio
2
+ gradio==3.28.3
3
+
4
+ # HuggingFace
5
+ accelerate
6
+ sentencepiece
7
+ transformers==4.33.3
settings.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # URLs for the StarCoder Models/APIs
2
+ DEFAULT_HUGGINGFACE_MODELS_API_BASE_URL = "https://api-inference.huggingface.co/models/"
3
+ DEFAULT_STARCODER_API_PATH = "bigcode/starcoder/"
4
+ DEFAULT_STARCODER_BASE_API_PATH = "bigcode/starcoderbase/"
5
+ FIM_INDICATOR = "<FILL_HERE>"
6
+ DEFAULT_PORT = 7860
7
+
8
+ STATIC_PATH = "static"
9
+
10
+ DEFAULT_SETTINGS = dict(
11
+ temperature=0.9,
12
+ max_new_tokens=256,
13
+ top_p=0.95,
14
+ repetition_penalty=1.0,
15
+ version="StarCoder",
16
+ )
static/loading-icon.svg ADDED
static/styles.css ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600;700&display=swap');
2
+
3
+ h1, h2 {
4
+ font-family: 'IBM Plex Mono', sans-serif;
5
+ }
6
+
7
+ .generating {
8
+ visibility: hidden
9
+ }
10
+
11
+ .gradio-container {
12
+ color: black
13
+ }
14
+
15
+ /* monospace_css */
16
+ #q-input textarea {
17
+ font-family: monospace, 'Consolas', Courier, monospace;
18
+ }
19
+
20
+ /* Share Button */
21
+
22
+ /* it was hidden directly inside the svg xml content */
23
+ #share-btn-loading-icon {
24
+ display: none;
25
+ }
26
+
27
+ a {
28
+ text-decoration-line: underline;
29
+ font-weight: 600;
30
+ }
31
+
32
+ .animate-spin {
33
+ animation: spin 1s linear infinite;
34
+ }
35
+
36
+ @keyframes spin {
37
+ from {
38
+ transform: rotate(0deg);
39
+ }
40
+ to {
41
+ transform: rotate(360deg);
42
+ }
43
+ }
44
+
45
+ #share-btn-container {
46
+ display: flex;
47
+ padding-left: 0.5rem !important;
48
+ padding-right: 0.5rem !important;
49
+ background-color: #000000;
50
+ justify-content: center;
51
+ align-items: center;
52
+ border-radius: 9999px !important;
53
+ width: 15rem;
54
+ }
55
+
56
+ #share-btn {
57
+ all: initial;
58
+ color: #ffffff;
59
+ font-weight: 600;
60
+ cursor: pointer;
61
+ font-family: 'IBM Plex Sans', sans-serif;
62
+ margin-left: 0.5rem !important;
63
+ padding-top: 0.25rem !important;
64
+ padding-bottom: 0.25rem !important;
65
+ }
66
+
67
+ #share-btn * {
68
+ all: unset;
69
+ }
70
+
71
+ #share-btn-container div:nth-child(-n+2) {
72
+ width: auto !important;
73
+ min-height: 0px !important;
74
+ }
75
+
76
+ #share-btn-container .wrap {
77
+ display: none !important;
78
+ }
utils.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List
3
+
4
+ from settings import STATIC_PATH
5
+
6
+
7
+ def is_google_colab():
8
+ """Check if the environment is Google Colab."""
9
+ try:
10
+ from google.colab import drive
11
+
12
+ return True
13
+ except ImportError:
14
+ return False
15
+
16
+
17
+ def get_file_as_string(file_name, path=STATIC_PATH) -> str:
18
+ """Loads the content of a file given its name
19
+ and returns all of its lines as a single string
20
+ if a file path is given, it will be used
21
+ instead of the default static path (from settings)
22
+
23
+ Args:
24
+ file_name (_type_): The name of the file to load.
25
+ path (str, optional): The path to the file. Defaults to the current directory.
26
+
27
+ Returns:
28
+ str: The content of the file as a single string
29
+ """
30
+ with open(os.path.join(path, file_name), mode="r", encoding="UTF-8") as f:
31
+ return f.read()
32
+
33
+
34
+ def get_sections(string: str, delimiter: str, up_to: int = None) -> List[str]:
35
+ """Splits a string into sections given a delimiter
36
+
37
+ Args:
38
+ string (str): The string to split
39
+ delimiter (str): The delimiter to use
40
+ up_to (int, optional): The maximum number of sections to return.
41
+ Defaults to None (which means all sections)
42
+
43
+ Returns:
44
+ List[str]: The list of sections (up to the given limit, if any provided)
45
+ """
46
+ return [
47
+ section.strip()
48
+ for section in string.split(delimiter)
49
+ if (section and not section.isspace())
50
+ ][:up_to]
51
+
52
+
53
+ def get_workers(safety: int = 4) -> int:
54
+ """Return the number of cores available on the current system, minus a safety margin."""
55
+ return max(1, os.cpu_count() - safety)