Andreas Varvarigos commited on
Commit
b474902
·
verified ·
1 Parent(s): bb820c9

Delete configs

Browse files
configs/alpaca.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "description": "Template used by Alpaca-LoRA.",
3
- "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
4
- "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
5
- "response_split": "### Response:"
6
- }
 
 
 
 
 
 
 
configs/cleaning_config.yaml DELETED
@@ -1,203 +0,0 @@
1
- patterns_and_insertions:
2
- [
3
- {
4
- "pattern" : '(?:\\figcomp{\s*)(?P<first>.*?)\s*}\s*{\s*(?P<second>.*?)\s*}\s*{\s*(?P<third>.*?)\s*}',
5
- "insertion" : '\parbox[c]{{ {second} \linewidth}} {{ \includegraphics[width= {third} \linewidth]{{figures/{first} }} }}',
6
- "description" : "Replace figcomp",
7
- },
8
- ]
9
-
10
- verbose: False
11
-
12
- commands_to_delete: [
13
- 'footnote',
14
- 'footnote ',
15
- 'crdata',
16
- 'appendixhead',
17
- 'selectlanguage',
18
- 'name',
19
- 'expandafter',
20
- 'copyrightyear',
21
- 'acmYear',
22
- 'acmBooktitle',
23
- 'acmPrice',
24
- 'authorcontributions',
25
- 'thanksref',
26
- 'funding',
27
- 'conflictsofinterest',
28
- 'externalbibliography',
29
- 'acmDOI',
30
- 'acmISBN',
31
- 'acmConference',
32
- 'titleheader',
33
- 'affil',
34
- 'authorrunning',
35
- 'pagenumbering',
36
- 'enlargethispage',
37
- 'author',
38
- 'AuthorNames',
39
- 'author\n',
40
- 'Author\n',
41
- 'Author',
42
- 'fntext',
43
- 'icmlauthor',
44
- 'icmlauthor\n',
45
- 'icmladdress',
46
- 'icmladdress\n',
47
- 'received',
48
- 'runninghead',
49
- 'bstctlcite',
50
- 'slugger',
51
- 'tocauthor',
52
- 'author\*',
53
- 'vspace\*',
54
- '\write18',
55
- 'hspace\*',
56
- 'vspace',
57
- 'hspace',
58
- 'maketitle',
59
- 'institute',
60
- 'label',
61
- 'urlstyle',
62
- 'acks',
63
- 'tnoteref',
64
- 'Appendix',
65
- 'urlstyle',
66
- 'url',
67
- 'editor',
68
- 'ccsdesc',
69
- 'cortext',
70
- 'bibliography',
71
- 'permission',
72
- 'usetikzlibrary',
73
- 'thanks',
74
- 'thispagestyle',
75
- 'abovedisplayskip',
76
- 'belowdisplayskip',
77
- 'bibliographystyle',
78
- 'IfSubStringInString',
79
- 'hyphenation',
80
- 'theoremstyle',
81
- 'colorbox',
82
- 'textcolor',
83
- 'color',
84
- 'caption',
85
- 'thlabel',
86
- 'fancyfoot',
87
- 'captionof',
88
- 'settopmatter',
89
- 'IEEEtriggeratref',
90
- 'IEEEauthorblockN',
91
- 'IEEEauthorblockA',
92
- 'IEEEauthorblockN\n',
93
- 'IEEEauthorblockA\n',
94
- 'IEEEauthorrefmark',
95
- 'orcid',
96
- 'typeout',
97
- 'fancyhead',
98
- 'pagestyle',
99
- 'biboptions',
100
- 'affiliation',
101
- 'address',
102
- 'institution',
103
- 'printalgoIEEE',
104
- 'date',
105
- 'authornote',
106
- 'numberofauthors',
107
- 'footnotetext',
108
- 'email',
109
- 'reftitle',
110
- 'setcopyright',
111
- 'ead',
112
- 'deleted',
113
- 'includegraphics',
114
- 'comment',
115
- 'abstract',
116
- 'replaced',
117
- 'xspace',
118
- ]
119
-
120
- commands_only_to_delete: [
121
- 'titlerunning',
122
- 'runningtitle',
123
- 'title',
124
- 'title\*',
125
- 'accept',
126
- 'added',
127
- 'icmltitle',
128
- 'textsuperscript',
129
- 'texttt',
130
- 'textsc',
131
- 'textit',
132
- 'mathit',
133
- 'makebox',
134
- 'mbox',
135
- 'textbf',
136
- 'acl',
137
- 'textnormal',
138
- 'texttt ',
139
- 'textsc ',
140
- 'textit ',
141
- 'mathit ',
142
- 'textbf ',
143
- 'gls',
144
- 'Gls',
145
- 'glspl',
146
- 'textnormal ',
147
- 'inlinetitle',
148
- 'mbox',
149
- 'hl',
150
- 'highlight',
151
- 'IEEEraisesectionheading',
152
- 'IEEEtitleabstractindextext',
153
- 'IEEEPARstart',
154
- 'it',
155
- 'added',
156
- 'paragraph',
157
- 'paragraph\*',
158
- 'MakeLowercase',
159
- 'emph',
160
- 'emph ',
161
- 'text',
162
- 'acp',
163
- 'ac'
164
- ]
165
-
166
- environments_to_delete: [
167
- 'icmlauthorlist',
168
- 'tikzpicture',
169
- 'groupplot',
170
- 'biography',
171
- 'IEEEbiographynophoto',
172
- 'acronym',
173
- 'MSCcodes',
174
- 'IEEEbiography',
175
- 'figure',
176
- 'AMS',
177
- 'acknowledgement',
178
- 'acknowledgments',
179
- 'acknowledgements',
180
- 'figure\*',
181
- 'minipage',
182
- 'table',
183
- 'table\*',
184
- 'glist',
185
- 'tabular',
186
- 'tabular\*',
187
- 'center',
188
- 'remark',
189
- 'algorithm',
190
- 'algorithmic',
191
- 'CCSXML',
192
- 'acks',
193
- 'lstlisting',
194
- 'tabu',
195
- 'algorithm\*',
196
- 'algorithmic\*',
197
- 'longtable',
198
- 'sidewaystable\*',
199
- 'sidewaystable',
200
- 'appendices',
201
- 'wrapfigure',
202
- 'appendix'
203
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/config.yaml DELETED
@@ -1,55 +0,0 @@
1
- data_downloading:
2
- download_directory: "quant_bio_retrieval/" # directory where the papers will be downloaded and the graph will be saved
3
- gexf_file: "test_graph.gexf" # name of the graph file that will be created only if downloading option is true
4
- processing:
5
- random_seed: 10
6
- keep_unstructured_content: false # keep unstructured content of the papers as graph node attribute if true
7
- arxiv_rate_limit: 3 # time in seconds to wait between each arxiv api call to avoid ban
8
-
9
- retriever:
10
- embedder: BAAI/bge-large-en-v1.5
11
- num_retrievals: 30000
12
- load_arxiv_embeds: True # load arxiv embeddings from huggingface if true else generate them
13
-
14
- inference:
15
- base_model: meta-llama/Meta-Llama-3-8B
16
- pretrained_model: "models/Robotics/Meta-LLama-3-8B-Quantative-Robotics" # used only if training option is false
17
- generation_args:
18
- max_new_tokens: 1000
19
- do_sample: True
20
- top_p: 0.9
21
- top_k: 50
22
- temperature: 0.7
23
- no_repeat_ngram_size: 2
24
- num_beams: 1
25
- gen_related_work_instruct_model: meta-llama/Llama-3.1-8B-Instruct # Model assisting at the generation of related work instructions
26
-
27
- training:
28
- predefined_graph_path: "robotics.gexf" # path to the graph dataset used for fine-tuning only if downloading option is false
29
- trainer_args:
30
- per_device_train_batch_size: 4
31
- warmup_steps: 100
32
- num_train_epochs: 1
33
- learning_rate: 0.0002
34
- lr_scheduler_type: 'cosine'
35
- fp16: true
36
- logging_steps: 1
37
- save_steps: 50
38
- trainer_output_dir: trainer_outputs/
39
- tokenizer:
40
- max_length: 1024
41
- qlora:
42
- rank: 8
43
- lora_alpha: 32
44
- lora_dropout: 0.05
45
- target_modules: # modules for which to train lora adapters
46
- - q_proj
47
- - k_proj
48
- - v_proj
49
- - o_proj
50
-
51
- # Used only if training option is true to save and load the fine-tuned model
52
- model_saving:
53
- model_name: llama_1b_qlora_uncensored
54
- model_output_dir: models # model saved in {model_output_dir}/{model_name}_{index} # model saved in {model_output_dir}/{model_name}_{index} after fine-tuning completion
55
- index: 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/config_noUI.yaml DELETED
@@ -1,37 +0,0 @@
1
- # Note: In order to train the model and then evaluate the same model, you need to
2
- # make sure that the base_model to be the same in both the train and eval sections
3
-
4
- # Evaluation configuration
5
- eval:
6
- base_model: meta-llama/Llama-3.2-1B
7
- graph_path: datasets/quantum_graph.gexf
8
- model_name: llama_1b_qlora_uncensored
9
-
10
- # Training configuration
11
- training:
12
- graph_path: datasets/quantum_graph.gexf # path to the graph file to train on
13
- base_model: meta-llama/Llama-3.2-1B
14
- trainer_args:
15
- per_device_train_batch_size: 4
16
- warmup_steps: 100
17
- num_train_epochs: 1
18
- learning_rate: 0.0002
19
- lr_scheduler_type: 'cosine'
20
- fp16: true
21
- logging_steps: 1
22
- save_steps: 50
23
- trainer_output_dir: trainer_outputs/
24
- tokenizer:
25
- max_length: 1024
26
- qlora:
27
- rank: 8
28
- lora_alpha: 32
29
- lora_dropout: 0.05
30
- target_modules: # modules for which to train lora adapters
31
- - q_proj
32
- - k_proj
33
- - v_proj
34
- - o_proj
35
- model_saving:
36
- model_output_dir: models # model saved in {model_output_dir}/{model_name} after fine-tuning completion
37
- model_name: llama_1b_qlora_uncensored
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/latex_commands.yaml DELETED
@@ -1,162 +0,0 @@
1
- verbatim_to_delete: [
2
- '\IEEEpeerreviewmaketitle',
3
- '\normalcolor',
4
- '\ifCLASSOPTIONcaptionsoff',
5
- '\pagebreak',
6
- '\makeatletter',
7
- '\makeatother',
8
- '\maketitle',
9
- '\preface',
10
- '\eShell',
11
- '\medskip',
12
- '\tableofcontents',
13
- '\begin{@twocolumnfalse}',
14
- '\end{@twocolumnfalse}',
15
- '\bgroup',
16
- '\egroup',
17
- '\ifnalpaper1',
18
- '\let\thefootnote',
19
- '\begin{spacing}{2.0}',
20
- '\end{landscape}',
21
- '\begin{landscape}',
22
- '\begin{doublespacing}',
23
- '\end{doublespacing}',
24
- '\begin{spacing}',
25
- '\end{spacing}',
26
- '\printbibliography',
27
- '\begin{sloppypar}',
28
- '\end{sloppypar}',
29
- '\ifbd',
30
- '\iftr',
31
- '\fussy',
32
- '\sloppy',
33
- '\emergencystretch',
34
- '\hideLIPIcs',
35
- '\tolerance',
36
- '\hbadness',
37
- '\bShell',
38
- '\glsresetall',
39
- '\copyrightnotice',
40
- '\copyright',
41
- '\centering',
42
- '\immediate',
43
- '\doublespacing',
44
- '\flushbottom',
45
- '\printAffiliationsAndNotice',
46
- '\IEEEpubid',
47
- '\twocolumn',
48
- '\noindent',
49
- '\indent',
50
- '\onecolumn',
51
- '\ignore',
52
- '\selectfont',
53
- '\raggedbottom',
54
- '\IEEEoverridecommandlockouts',
55
- '\newline',
56
- '\tiny',
57
- '\break',
58
- '\mainmatter',
59
- '\let\rc\rangle',
60
- '\let\lc\langle',
61
- '\acresetall',
62
- '\acknowledgments',
63
- '\begin{section}',
64
- '\begin{small}',
65
- '\end{small}',
66
- '\relax',
67
- '\ninept',
68
- '\FloatBarrier',
69
- '\boldmath',
70
- '\end{section}',
71
- '\Huge',
72
- '\fancyhf',
73
- '\fancyhead',
74
- '\begin{frontmatter}',
75
- '\end{frontmatter}',
76
- '\clearpage',
77
- '\huge',
78
- '\newpage',
79
- '\IEEEdisplaynontitleabstractindextext',
80
- '\DontPrintSemicolon',
81
- '\Large',
82
- '\LARGE',
83
- '\ifCLASSOPTIONcompsoc',
84
- '\ifCLASSOPTIONonecolumn',
85
- '\xspace',
86
- '\large',
87
- '\acmcopyr',
88
- '\flushleft',
89
- '\newpage',
90
- '\protect',
91
- '\begingroup',
92
- '\endgroup',
93
- '\bigskip',
94
- '\smallskip',
95
- '\small',
96
- '\left',
97
- '\right',
98
- '\vfill',
99
- '\hfill',
100
- '\begin{appendices}',
101
- '\IEEEQED',
102
- '\leavevmode',
103
- '\footnotesize',
104
- '\nonumber',
105
- '\scriptsize',
106
- '\IEEEpubidadjcol',
107
- '\balance',
108
- '\normalsize',
109
- '\ifloguseIEEEConf',
110
- '\else',
111
- '\fi',
112
- '\bf ',
113
- '\it ',
114
- '\verb ',
115
- '\tt ',
116
- '\em ',
117
- '\par '
118
- ]
119
-
120
- two_arguments: [
121
- 'IEEEPARstart',
122
- 'pgfdeclareplotmark',
123
- 'setcounter',
124
- 'texorpdfstring',
125
- 'fontsize',
126
- 'addtocounter',
127
- 'addtolength'
128
- ]
129
-
130
- three_arguments: [
131
- 'definecolor'
132
- ]
133
-
134
- two_arguments_elaborate: [
135
- 'markboth',
136
- 'setlength',
137
- 'pgfdeclareplotmark',
138
- 'icmlsetsymbol',
139
- 'texorpdfstring',
140
- 'conferenceinfo',
141
- 'acrodef',
142
- 'icmlcorrespondingauthor',
143
- 'pdfbookmark',
144
- 'icmlaffiliation',
145
- 'icmlcorrespondingauthor'
146
- ]
147
-
148
- three_arguments_elaborate: [
149
- 'ifthenelse',
150
- 'addcontentsline'
151
- ]
152
-
153
- replace_comments: [
154
- 'def\\',
155
- 'def ',
156
- 'newglossaryentry',
157
- 'newtheorem',
158
- 'newcommand',
159
- 'renewcommand',
160
- 'newenvironment',
161
- 'renewenvironment'
162
- ]