Andreas99 commited on
Commit
8dd388d
·
verified ·
1 Parent(s): a3bcde9

Delete utils

Browse files
utils/de-macro.py DELETED
@@ -1,1110 +0,0 @@
1
- #!/usr/bin/python -O
2
-
3
- r"""
4
- Copyright 2005-2020 Peter Gacs
5
- Licensed under the Academic Free Licence version 2.1
6
-
7
- DE-MACRO
8
-
9
- Version 1.4.1 - A small typo corrected.
10
-
11
- Version 1.4 - Luca Citi made it python2.7 and python3 compatible.
12
- Peter Gacs improved the parsing of \input{<filename>},
13
- and made @ a letter in the style files.
14
- Version 1.3 - this version is much more conservative about deleting
15
- comments and inserting or deleting blank space: tries to
16
- leave in all comments, adds space only when necessary, and
17
- tries not to delete space in the main text.
18
- The motivating comments came from Daniel Webb.
19
- Version 1.2 - a syntactical bug corrected, thanks Brian de Alwis!
20
-
21
-
22
- PURPOSE
23
-
24
- This program can eliminate most private macros from a LaTeX file.
25
- Applications:
26
- - your publisher has difficulty dealing with many private macros
27
- - you cooperate with colleagues who do not understand your macros
28
- - preprocessing before a system like latex2html, which is somewhat
29
- unpredictable with private macros.
30
-
31
- It cannot be used to eliminate more complex macros that rely on
32
- more programming-like constructs in style files. In particular, it will
33
- not replace style files that have options.
34
-
35
- USAGE
36
-
37
- de-macro [--defs <defs-db>] <tex-file-1>[.tex] [<tex-file-2>[.tex] ...]
38
-
39
- Simplest example: de-macro testament
40
-
41
- (As you see, the <> is used only in the notation of this documentation,
42
- you should not type it.)
43
-
44
- If <tex-file-i> contains a command \usepackage{<defs-file>-private}
45
- then the file <defs-file>-private.sty will be read, and its macros will be
46
- replaced in <tex-file-i> with their definitions.
47
- The result is in <tex-file-i>-clean.tex.
48
-
49
- Only newcommand, renewcommand, newenvironment, and renewenvironment are
50
- understood (it does not matter, whether you write new or renew).
51
- These can be nested but do not be too clever, since I do not
52
- guarantee the same expansion order as in TeX.
53
-
54
- FILES
55
-
56
- <tex-file-1>.db
57
- <tex-file>-clean.tex
58
- <defs-file>-private.sty
59
-
60
- For speed, a macro database file called <defs-file>.db is created.
61
- If such a file exists already then it is used.
62
- If <defs-file>-private.sty is older than <tex-file-1>.db then it will not
63
- be used.
64
-
65
- It is possible to specify another database filename via --defs <defs-db>.
66
- Then <defs-db>.db will be used.
67
-
68
- For each <tex-file-i>, a file <tex-file-i>-clean.tex will be produced.
69
- If <tex-file-i>-clean.tex is newer than <tex-file-i>.tex then it stays.
70
-
71
- INPUT COMMAND
72
-
73
- If a tex file contains a command \input{<tex-file-j>} or \input <tex-file-j>
74
- then <tex-file-j>.tex is processed recursively, and <tex-file-j>-clean.tex
75
- will be inserted into the final output.
76
- For speed, if <tex-file-j>-clean.tex is newer than <tex-file-j>.tex
77
- then <tex-file-j>.tex will not be reprocessed.
78
-
79
- The dependency checking is not sophisticated, so if you rewrite some macros
80
- then remove all *-clean.tex files!
81
-
82
- """
83
-
84
- import sys, os, re, shelve
85
-
86
- # Utilities
87
-
88
- class No_detail:
89
- strerror = ""
90
-
91
- no_detail = No_detail()
92
-
93
-
94
- class Error(Exception):
95
- """Base class for exceptions in this module."""
96
- pass
97
-
98
- class Empty_text_error(Error):
99
- """Exception raised for errors in the input.
100
-
101
- Attributes:
102
- data -- data that was found empty
103
- message
104
- """
105
-
106
- def __init__(self, data, message):
107
- self.data = data
108
- self.message = message
109
-
110
- def warn(error_message, detail = no_detail):
111
- sys.stderr.write(error_message + "\n")
112
- if no_detail != detail:
113
- sys.stderr.write(detail.strerror + "\n")
114
-
115
- def die(error_message, detail = no_detail):
116
- warn(error_message, detail = no_detail)
117
- sys.exit(1)
118
-
119
- def getopt_map(one_letter_opts, long_optlist):
120
- "Turns long options into an option map, using getopt."
121
- import getopt
122
- optlist, args = getopt.getopt(sys.argv[1:],
123
- one_letter_opts, long_optlist)
124
- opt_map = {}
125
- for pair in optlist: opt_map[pair[0]] = pair[1] or 1
126
- return opt_map, args
127
-
128
- def newer(file1, file2):
129
-
130
- if not os.path.isfile(file1):
131
- return False
132
-
133
- try:
134
- stat_return = os.lstat(file1)
135
- except OSError as detail:
136
- die("lstat " + file1 + " failed:", detail)
137
- time1 = stat_return.st_mtime
138
-
139
- try:
140
- stat_return = os.lstat(file2)
141
- except OSError as detail:
142
- die("lstat " + file2 + " failed:", detail)
143
- time2 = stat_return.st_mtime
144
-
145
- return time1 > time2
146
-
147
- def cut_extension(filename, ext):
148
- """
149
- If filename has extension ext (including the possible dot),
150
- it will be cut off.
151
- """
152
- file = filename
153
- index = filename.rfind(ext)
154
- if 0 <= index and len(file)-len(ext) == index:
155
- file = file[:index]
156
- return file
157
-
158
-
159
- class Stream:
160
- data = None
161
- pos = None
162
- item = None
163
-
164
- def legal(self):
165
- return 0 <= self.pos and self.pos < len(self.data)
166
-
167
- def uplegal(self):
168
- return self.pos < len(self.data)
169
-
170
- def __init__(self, data_v = None):
171
- self.data = data_v
172
- if self.data:
173
- self.pos = 0
174
- self.item = self.data[self.pos]
175
-
176
- def next(self):
177
- self.pos += 1
178
- if self.pos < len(self.data):
179
- self.item = self.data[self.pos]
180
- return self.item
181
-
182
- def reset(self):
183
- if self.data and 0 < len(self.data):
184
- self.pos = 0
185
- self.item = self.data[0]
186
- return self.item
187
-
188
-
189
- # Basic classes
190
-
191
- blank_re = re.compile(r"\s")
192
- blanked_filename_re = re.compile(r"^\s+(\w*)\s+")
193
- braced_filename_re = re.compile(r"^\s*{\s*(\w*)\s*}")
194
- blank_or_rbrace_re = re.compile(r"[\s}]")
195
- pos_digit_re = re.compile(r"[1-9]")
196
-
197
- def isletter(c, isatletter=False):
198
- if "@" == c:
199
- return isatletter
200
- else:
201
- return c.isalpha()
202
-
203
- class Token:
204
- """Type 0 means ordinary character, types 1,2 mean escape sequence
205
- (without the \ ), type 3 means comment.
206
- """
207
- simple_ty = 0
208
- esc_symb_ty = 1
209
- esc_str_ty = 2
210
- comment_ty = 3
211
-
212
- type = simple_ty
213
- val = " "
214
-
215
- def __init__(self, type_v=simple_ty, val_v=" "):
216
- self.type = type_v
217
- self.val = val_v
218
-
219
- def show(self):
220
- out = ""
221
- if simple_ty == self.type or comment_ty == self.type:
222
- out = self.val
223
- else:
224
- out = "\\" + self.val
225
- return out
226
-
227
-
228
- # Constants
229
-
230
- g_token = Token(0," ") # generic token
231
- simple_ty = g_token.simple_ty
232
- comment_ty = g_token.comment_ty
233
- esc_symb_ty = g_token.esc_symb_ty
234
- esc_str_ty = g_token.esc_str_ty
235
-
236
-
237
- def detokenize(text, isatletter=False):
238
- """
239
- Input is a list of tokens.
240
- Output is a string.
241
- """
242
- out = ""
243
- if 0 == len(text):
244
- return
245
- pos = 0
246
- out += text[pos].show()
247
- pos += 1
248
- while pos < len(text):
249
- previtem = text[pos-1]
250
- item = text[pos]
251
- """Insert a separating space after an escape sequence if it is a
252
- string and is followed by a letter."""
253
- if (esc_str_ty == previtem.type
254
- and simple_ty == item.type and isletter(item.val[0], isatletter)):
255
- out += " "
256
- out += item.show()
257
- pos += 1
258
- return out
259
-
260
-
261
- def strip_comments(text):
262
- """
263
- Input is a list of tokens.
264
- Output is the same list except the comment tokens.
265
- """
266
- out = []
267
- for token in text:
268
- if not comment_ty == token.type:
269
- out.append(token)
270
- return out
271
-
272
- class Group:
273
- """type 0 means a token, type 1 means contents of a group within {}
274
- """
275
- token_ty = 0
276
- group_ty = 1
277
- type = token_ty
278
- val = [] # Value is a token list.
279
-
280
- def __init__(self, type_v, val_v):
281
- self.type = type_v
282
- self.val = val_v
283
-
284
- def show(self):
285
- if token_ty == self.type:
286
- return self.val.show()
287
- else:
288
- return "{%s}" % detokenize(self.val)
289
-
290
- # Constants
291
-
292
- g_group = Group(0, [])
293
- token_ty = g_group.token_ty
294
- group_ty = g_group.group_ty
295
-
296
-
297
- def tokenize(in_str, isatletter=False):
298
- """Returns a list of tokens.
299
- """
300
- text = []
301
- cs = Char_stream(in_str)
302
- cs.reset()
303
- if not cs.legal():
304
- raise Error("No string to tokenize.")
305
- while cs.uplegal():
306
- if "%" == cs.item:
307
- comment = cs.scan_comment_token()
308
- text.append(Token(comment_ty, comment))
309
- elif "\\" != cs.item:
310
- text.append(Token(simple_ty, cs.item))
311
- cs.next()
312
- else:
313
- cs.next()
314
- name = cs.scan_escape_token(isatletter)
315
- if isletter(name[0], isatletter):
316
- token = Token(esc_str_ty, name)
317
- else:
318
- token = Token(esc_symb_ty, name)
319
- text.append(token)
320
- if "makeatletter" == name:
321
- isatletter=True
322
- elif "makeatother" == name:
323
- isatletter=False
324
- return text
325
-
326
-
327
- class Command_def:
328
- name = "1"
329
- numargs = 0
330
- body= ""
331
-
332
- def __init__(self, name_v, numargs_v, body_v):
333
- self.name = name_v
334
- self.numargs = numargs_v
335
- self.body = body_v
336
-
337
- def show(self):
338
- out = "\\newcommand{\\%s}" % (self.name)
339
- if 0 < self.numargs:
340
- out += "[%d]" % self.numargs
341
- out += "{%s}" % detokenize(self.body)
342
- return out
343
-
344
-
345
- class Env_def:
346
- name = "1"
347
- numargs = 0
348
- begin = ""
349
- end = ""
350
-
351
- def __init__(self, name_v, numargs_v, begin_v, end_v):
352
- self.name = name_v
353
- self.numargs = numargs_v
354
- self.begin = begin_v
355
- self.end = end_v
356
-
357
- def show(self):
358
- out = "\\newenvironment{%s}" % self.name
359
- if 0 < self.numargs:
360
- out += "[%d]" % self.numargs
361
- out += "{%s}" % detokenize(self.begin)
362
- out += "{%s}" % detokenize(self.end)
363
- return out
364
-
365
-
366
- class Command_instance:
367
- name = "1"
368
- args = []
369
-
370
- def __init__(self, name_v, args_v):
371
- self.name = name_v
372
- self.args = args_v
373
-
374
- def show(self):
375
- out = "\\"+self.name
376
- for arg in self.args:
377
- out += "{%s}" % detokenize(arg)
378
- return out
379
-
380
-
381
- class Env_instance:
382
- name = "1"
383
- args = []
384
-
385
- def __init__(self, name_v, args_v, body_v):
386
- self.name = name_v
387
- self.args = args_v
388
- self.body = body_v
389
-
390
- def show(self):
391
- out = "\\begin{%s}" % self.name
392
- for arg in self.args:
393
- out += "{%s}" % detokenize(arg)
394
- out += detokenize(self.body)
395
- out += "\\end{%s}" % self.name
396
- return out
397
-
398
- class Char_stream(Stream):
399
-
400
- def scan_escape_token(self, isatletter=False):
401
- """
402
- Starts after the escape sign, assumes that it is scanning a symbol.
403
- Returns a token-string.
404
- """
405
- out = self.item # Continue only if this is a letter.
406
- item = self.next()
407
- if isletter(out, isatletter):
408
- while self.uplegal() and isletter(item, isatletter):
409
- out += item
410
- item = self.next()
411
- return out
412
-
413
- def scan_comment_token(self):
414
- """
415
- Starts at the comment sign %, assumes that it is scanning a comment.
416
- Returns the whole comment string,
417
- including the % and all empty space after it.
418
- """
419
- comment = ""
420
- while self.uplegal() and "\n" != self.item:
421
- comment += self.item
422
- self.next()
423
- while self.uplegal() and blank_re.match(self.item):
424
- comment += self.item
425
- self.next()
426
- return comment
427
-
428
- def scan_input_filename(self):
429
- """We have just read an \input token. The next group or word will be
430
- interpreted as a filename (possibly without .tex). Filenames should not begin with spaces.
431
- Return the filename.
432
- """
433
- item = self.item
434
- file = ""
435
- while self.uplegal() and blank_re.match(self.item):
436
- item = self.next()
437
- if "{" == item:
438
- item = self.next()
439
- while self.uplegal() and not "}" == item:
440
- file += item
441
- item = self.next()
442
- self.next()
443
- else:
444
- while self.uplegal() and not blank_re.match(item):
445
- file += item
446
- item = self.next()
447
- return file
448
-
449
- def scan_package_filenames(self):
450
- r"""We just read a \usepackage token. The next group will be
451
- interpreted as a list of filenames (without .sty) separated by commas.
452
- Return the list.
453
- """
454
- item = self.item
455
- while self.uplegal() and blank_re.match(item):
456
- item = self.next()
457
- file = ""
458
- if not "{" == item:
459
- raise Error("\\usepackage not followed by brace.")
460
- item = self.next()
461
- while self.uplegal() and not blank_or_rbrace_re.match(item):
462
- file += item
463
- item = self.next()
464
- self.next()
465
- return file.split(",")
466
-
467
-
468
- class Tex_stream(Stream):
469
-
470
- defs = ({}, {})
471
- defs_db = "x"
472
- defs_db_file = "x.db"
473
- debug = False
474
-
475
- def smart_tokenize(self, in_str, handle_inputs=False, isatletter=False):
476
- """Returns a list of tokens.
477
- It may interpret and carry out all \input commands.
478
- """
479
- self.data = []
480
- text = self.data
481
- cs = Char_stream(in_str)
482
- cs.reset()
483
- if not cs.legal():
484
- raise Error("No string to tokenize.")
485
- while cs.uplegal():
486
- if "%" == cs.item:
487
- comment = cs.scan_comment_token()
488
- text.append(Token(comment_ty, comment))
489
- elif "\\" != cs.item:
490
- text.append(Token(simple_ty, cs.item))
491
- cs.next()
492
- else:
493
- cs.next()
494
- name = cs.scan_escape_token(isatletter)
495
- if "input" == name and handle_inputs:
496
- file = cs.scan_input_filename()
497
- to_add = self.process_if_newer(file)
498
- text.extend(to_add)
499
- elif "usepackage" == name:
500
- while cs.uplegal() and blank_re.match(cs.item):
501
- cs.next()
502
- if "[" == cs.item: # Packages with options will not be processed.
503
- text.extend([Token(esc_str_ty, "usepackage"),
504
- Token(simple_ty, "[")])
505
- cs.next()
506
- continue
507
- files = cs.scan_package_filenames()
508
- i = 0
509
- while i < len(files): # process private packages
510
- file = files[i]
511
- p = file.rfind("-private")
512
- if p < 0 or not len(file) - len("-private") == p:
513
- i += 1
514
- continue
515
- defs_db_file = file+".db"
516
- self.add_defs(file)
517
- del files[i:(i+1)]
518
- if files: # non-private packages left
519
- group_content = ",".join(files)
520
- to_add_str = "\\usepackage{%s}" % (group_content)
521
- to_add = tokenize(to_add_str,isatletter)
522
- text.extend(to_add)
523
- else:
524
- if isletter(name[0], isatletter):
525
- token = Token(esc_str_ty, name)
526
- else:
527
- token = Token(esc_symb_ty, name)
528
- text.append(token)
529
- if "makeatletter" == name:
530
- isatletter=True
531
- elif "makeatother" == name:
532
- isatletter=False
533
- self.reset()
534
- return self.data
535
-
536
- def smart_detokenize(self,isatletter=False):
537
- """
538
- Output is a string.
539
- If the list contains an \input{file} then the content of file
540
- file-clean.tex replaces it in the output.
541
- """
542
- self.reset()
543
- if not self.legal():
544
- return ""
545
- out = ""
546
- previtem = None
547
- while self.uplegal():
548
- item = self.item
549
- """Insert a separating space after an escape sequence if it is a
550
- string and is followed by a letter."""
551
- if (None != previtem and esc_str_ty == previtem.type
552
- and simple_ty == item.type and isletter(item.val[0], isatletter)):
553
- out += " "
554
- previtem = item
555
- if not (esc_str_ty == item.type and "input" == item.val):
556
- out += item.show()
557
- self.next()
558
- else:
559
- self.next()
560
- group = self.scan_group()
561
- file = detokenize(group.val)
562
- clean_file = "%s-clean.tex" % (file)
563
- print("Reading file %s" % (clean_file))
564
- fp = open(clean_file,"r")
565
- content = fp.read()
566
- fp.close()
567
- out += content
568
- return out
569
-
570
- # Basic tex scanning
571
-
572
- def skip_blank_tokens(self): # we also skip comment tokens.
573
- item = self.item
574
- while (self.uplegal() and
575
- (comment_ty == item.type or
576
- (simple_ty == item.type and blank_re.match(item.val)))):
577
- item = self.next()
578
- return item
579
-
580
- def scan_group(self):
581
- """Returns group.
582
- """
583
- if not self.legal():
584
- raise Error("No group to scan.")
585
- item = self.item
586
- if not (simple_ty == item.type and "{" == item.val):
587
- return Group(token_ty, [self.item])
588
- count = 1
589
- group = []
590
- item = self.next()
591
- while count and self.uplegal():
592
- if simple_ty == item.type:
593
- if "{" == item.val:
594
- count += 1
595
- elif "}" == item.val:
596
- count -= 1
597
- if count != 0:
598
- group.append(item)
599
- item = self.next()
600
- return Group(group_ty, group)
601
-
602
- # Command and environment definitions
603
-
604
- def scan_command_name(self):
605
- """Returns name.
606
- """
607
- if not self.legal():
608
- raise Error("No command name to scan.")
609
- item = self.item
610
- name = ""
611
- if item.type in [esc_symb_ty, esc_str_ty]:
612
- name = item.val
613
- else:
614
- if not "{" == item.val:
615
- raise Error("Command definition misses first {.")
616
- self.next()
617
- item = self.skip_blank_tokens()
618
- if not item.type in [esc_symb_ty, esc_str_ty]:
619
- raise Error("Command definition does not begin with control sequence.")
620
- name = item.val
621
- self.next()
622
- item = self.skip_blank_tokens()
623
- if not "}" == item.val:
624
- raise Error("Definition for commmand %s misses first }., %s" %
625
- (name, item.val))
626
- self.next()
627
- self.skip_blank_tokens()
628
- return name
629
-
630
- def scan_numargs(self, name):
631
- """
632
- name is the name of the command or environment definition being
633
- scanned.
634
- Starts on a nonblank token.
635
- Returns numargs
636
- where numargs is the number of arguments in a command or environment
637
- definition,
638
- """
639
- if not self.legal():
640
- raise Error("No numargs to scan.")
641
- item = self.item
642
- numargs = 0
643
- if not simple_ty == item.type:
644
- raise Error("Illegal command or environment definition: "+name)
645
- if "[" == item.val:
646
- if not 4 < len(self.data):
647
- raise Error("Command or environment definition is illegal: "+name)
648
- item = self.next()
649
- if not simple_ty == item.type:
650
- raise Error("Illegal command or environment definition: "+name)
651
- numargs = item.val
652
- if not pos_digit_re.match(numargs):
653
- raise Error("%s must be argument number after %s" % (numargs, name))
654
- numargs = int(numargs)
655
- self.next()
656
- item = self.skip_blank_tokens()
657
- if not simple_ty == item.type:
658
- raise Error("Illegal command definition: "+name)
659
- if "]" != item.val:
660
- raise Error("Illegal command definition: "+name)
661
- self.next()
662
- self.skip_blank_tokens()
663
- return numargs
664
-
665
- def scan_command_def(self):
666
- """Scan a command definition.
667
- Return command_def.
668
- Assumes that the number of arguments is at most 9.
669
- """
670
- if not self.legal():
671
- raise Error("No command definition to scan.")
672
- item = self.item
673
- if not 2 < len(self.data):
674
- raise Error("Command definition is illegal.")
675
- # newcommand or renewcommand
676
- if not item.type in [esc_symb_ty, esc_str_ty]:
677
- raise Error("Command definition should begin with control sequence: "+item.val)
678
- if item.val not in ["newcommand", "renewcommand"]:
679
- raise Error("Command definition should begin with control sequence.")
680
- self.next()
681
- self.skip_blank_tokens()
682
-
683
- cmd_name = self.scan_command_name()
684
- numargs = self.scan_numargs(cmd_name)
685
-
686
- body_group = self.scan_group()
687
- if group_ty != body_group.type:
688
- raise Error("Command body missing: "+cmd_name)
689
- body_val = strip_comments(body_group.val)
690
- return Command_def(cmd_name, numargs, body_val)
691
-
692
- def scan_env_name(self):
693
- """Starts on a {.
694
- Returns name.
695
- """
696
- if not self.legal():
697
- raise Error("No environment name to scan.")
698
- item = self.item
699
- if not "{" == item.val:
700
- raise Error("Env. definition begins with %s, not with {" % (item.val))
701
- self.next()
702
- item = self.skip_blank_tokens()
703
- name = ""
704
- if not simple_ty == item.type:
705
- raise Error("1. Env. def. begins with cont. seq. %s, not with env.name."
706
- % (item.val))
707
- while self.uplegal() and not blank_or_rbrace_re.match(item.val):
708
- name += item.val
709
- item = self.next()
710
- if not simple_ty == item.type:
711
- raise Error("2. Env. def. begins with cont. seq. %s, not with env.name."
712
- % (item.val))
713
- item = self.skip_blank_tokens()
714
- if not "}" == item.val:
715
- raise Error("Command definition does not begin with control sequence.")
716
- self.next()
717
- self.skip_blank_tokens()
718
- return name
719
-
720
- def scan_env_def(self):
721
- """Scan an environment definition.
722
- Return env_def
723
- Assumes that the number of arguments is at most 9.
724
- """
725
- if not self.legal():
726
- raise Error("No environment definition to scan.")
727
- item = self.item
728
- if not 7 < len(self.data):
729
- raise Error("Environment definition is illegal.")
730
- pos = 0
731
-
732
- if not item.type in [esc_symb_ty, esc_str_ty]:
733
- raise Error("Env. definition does not begin with control sequence:"+
734
- item.val)
735
- if item.val not in ["newenvironment", "renewenvironment"]:
736
- raise Error("Env. definition does not begin with control sequence.")
737
- self.next()
738
- self.skip_blank_tokens()
739
-
740
- env_name = self.scan_env_name()
741
- numargs = self.scan_numargs(env_name)
742
- self.skip_blank_tokens()
743
-
744
- begin_group = self.scan_group()
745
- if group_ty != begin_group.type:
746
- raise Error("Begin body missing: "+env_name)
747
- begin_val = strip_comments(begin_group.val)
748
-
749
- self.skip_blank_tokens()
750
-
751
- end_group = self.scan_group()
752
- if group_ty != end_group.type:
753
- raise Error("End body missing:"+env_name)
754
- end_val = strip_comments(end_group.val)
755
-
756
- return Env_def(env_name, numargs, begin_val, end_val)
757
-
758
- def scan_defs(self):
759
- if not self.legal():
760
- raise Error("No definitions to scan.")
761
- self.reset()
762
- command_defs, env_defs = self.defs
763
- while self.uplegal():
764
- if (esc_str_ty == self.item.type
765
- and self.item.val in ["newcommand", "renewcommand"]):
766
- def_start_pos = self.pos
767
- command_def = self.scan_command_def()
768
- command_defs[command_def.name] = command_def
769
- def_end_pos = self.pos
770
- for del_pos in range(def_start_pos,def_end_pos):
771
- del self.data[def_start_pos]
772
- self.pos = def_start_pos
773
- self.item = self.data[self.pos]
774
- elif (esc_str_ty == self.item.type and self.item.val
775
- in ["newenvironment", "renewenvironment"]):
776
- def_start_pos = self.pos
777
- env_def = self.scan_env_def()
778
- env_defs[env_def.name] = env_def
779
- def_end_pos = self.pos
780
- for del_pos in range(def_start_pos,def_end_pos):
781
- del self.data[def_start_pos]
782
- self.pos = def_start_pos
783
- self.item = self.data[self.pos]
784
- else:
785
- self.next()
786
-
787
- # Instances
788
-
789
- def scan_args(self, command_or_env_def):
790
- """Scan the arguments of a command or environment.
791
- Return [args].
792
- """
793
- if not self.legal():
794
- raise Error("No arguments to scan.")
795
- numargs = command_or_env_def.numargs
796
- name = command_or_env_def.name
797
-
798
- args = []
799
- for i in range(numargs):
800
- arg = []
801
- if not (simple_ty == self.item.type and "{" == self.item.val):
802
- arg = [self.item]
803
- self.next()
804
- else:
805
- group = self.scan_group()
806
- arg = group.val
807
- args.append(arg)
808
- return args
809
-
810
- def scan_command(self, command_def):
811
- """Scan the arguments of a command.
812
- Return command_instance
813
- """
814
- if not self.legal():
815
- raise Error("No command to scan.")
816
- if not self.item.type in [esc_symb_ty, esc_str_ty]:
817
- raise Error("Command does not begin with control sequence.")
818
- name = self.item.val
819
- self.next()
820
- if 0 < command_def.numargs:
821
- self.skip_blank_tokens()
822
- args = self.scan_args(command_def)
823
- else:
824
- args = []
825
- return Command_instance(name, args)
826
-
827
- def test_env_boundary(self, item):
828
- """Check whether an environment begin or end follows.
829
- Return 1 if \begin, -1 if \end, 0 otherwise.
830
- """
831
- d = 0
832
- if esc_str_ty == item.type:
833
- if "begin"==item.val:
834
- d = 1
835
- elif "end"==item.val:
836
- d = -1
837
- return d
838
-
839
- def scan_env_begin(self):
840
- """Scan an environment name.
841
- Return env_name.
842
- """
843
- if not self.legal():
844
- raise Error("No environment begin to scan.")
845
- item = self.item
846
- if not (esc_str_ty == item.type and "begin" == item.val):
847
- raise Error("Environment does not begin with begin.")
848
- self.next()
849
- name_group = self.scan_group()
850
- name = detokenize(name_group.val)
851
- return name
852
-
853
- def scan_env_end(self):
854
- """Scan an environment end.
855
- Return env_name.
856
- """
857
- if not self.legal():
858
- raise Error("No environment end to scan.")
859
- item = self.item
860
- if not (esc_str_ty == item.type and "end" == item.val):
861
- raise Error("Environment does not end with end.")
862
- self.next()
863
- name_group = self.scan_group()
864
- name = detokenize(name_group.val)
865
- return name
866
-
867
- def scan_env_rest(self, env_def):
868
- """Scanning starts after \begin{envname}.
869
- Returns env_instance.
870
- """
871
- if not self.legal():
872
- raise Error("No environment rest to scan.")
873
- count = 1 # We are already within a boundary.
874
- args = self.scan_args(env_def)
875
- body = []
876
- while count and self.uplegal():
877
- old_pos = self.pos
878
- d = self.test_env_boundary(self.item)
879
- count += d
880
- if 1 == d:
881
- self.scan_env_begin()
882
- elif -1 == d:
883
- self.scan_env_end()
884
- else:
885
- self.next()
886
- if 0 < count:
887
- body.extend(self.data[old_pos : self.pos])
888
- return Env_instance(env_def.name, args, body)
889
-
890
- # Definitions
891
-
892
- def restore_defs(self):
893
- if os.path.isfile(self.defs_db_file):
894
- print("Using defs db %s" % (self.defs_db_file))
895
- db_h = shelve.open(self.defs_db)
896
- self.defs = db_h["defs"]
897
- db_h.close()
898
-
899
- def save_defs(self):
900
- db_h = shelve.open(self.defs_db)
901
- if "defs" in db_h:
902
- del db_h["defs"]
903
- db_h["defs"] = self.defs
904
- db_h.close()
905
-
906
- def add_defs(self, defs_file):
907
- defs_file_compl = defs_file + ".sty"
908
- if not os.path.isfile(defs_file_compl):
909
- raise Error("%s does not exist" % (defs_file_compl))
910
-
911
- defs_db_file = self.defs_db_file
912
- if newer(defs_db_file, defs_file_compl):
913
- print("Using defs db %s for %s" % (defs_db_file, defs_file))
914
- else:
915
- defs_fp = open(defs_file_compl, "r")
916
- defs_str = defs_fp.read()
917
- defs_fp.close()
918
- ds = Tex_stream()
919
- ds.defs = self.defs
920
- defs_text = ds.smart_tokenize(defs_str,isatletter=True)
921
- # changing ds.defs will change self.defs
922
- if self.debug:
923
- defs_seen_file = "%s-seen.sty" % (defs_file)
924
- defs_seen_fp = open(defs_seen_file, "w")
925
- out = detokenize(defs_text,isatletter=True)
926
- defs_seen_fp.write(out)
927
- defs_seen_fp.close()
928
- ds.scan_defs()
929
- if self.debug:
930
- out = ""
931
- command_defs, env_defs = self.defs
932
- for def_name in command_defs.keys():
933
- out += command_defs[def_name].show() + "\n"
934
- for def_name in env_defs.keys():
935
- out += env_defs[def_name].show() +"\n"
936
- print("Definitions after reading %s:" % (defs_file))
937
- print(out)
938
-
939
- # Applying definitions, recursively
940
- # (maybe not quite in Knuth order, so avoid tricks!)
941
-
942
- def subst_args(self, body, args):
943
- out = []
944
- pos = 0
945
- while pos < len(body):
946
- item = body[pos]
947
- if not (simple_ty == item.type and "#" == item.val):
948
- out.append(item)
949
- pos += 1
950
- continue
951
- pos += 1
952
- token = body[pos]
953
- argnum = token.val
954
- if not pos_digit_re.match(argnum):
955
- raise Error("# is not followed by number.")
956
- argnum = int(argnum)
957
- if argnum > len(args):
958
- raise Error("Too large argument number.")
959
- arg = args[argnum-1]
960
- out += arg
961
- pos += 1
962
- return out
963
-
964
- def apply_command_recur(self, command_instance):
965
- command_defs, env_defs = self.defs
966
- name = command_instance.name
967
- command_def = command_defs[name]
968
-
969
- args = command_instance.args
970
- body = command_def.body
971
- result = self.subst_args(body, args)
972
- try:
973
- result = self.apply_all_recur(result)
974
- except Empty_text_error as e:
975
- raise Error("apply_all_recur fails on command instance %s: %s, %s" % \
976
- (command_instance.show(), detokenize(e.data), e.message))
977
- return result
978
-
979
- def apply_env_recur(self, env_instance):
980
- command_defs, env_defs = self.defs
981
- name = env_instance.name
982
- env_def = env_defs[name]
983
-
984
- begin, end = env_def.begin, env_def.end
985
- body, args = env_instance.body, env_instance.args
986
- out = self.subst_args(begin, args) + body + self.subst_args(end, args)
987
- return self.apply_all_recur(out)
988
-
989
-
990
- def apply_all_recur(self, data, report=False):
991
- ts = Tex_stream(data)
992
- ts.defs = self.defs
993
- command_defs, env_defs = self.defs
994
- out = []
995
- progress_step = 10000
996
- progress = progress_step
997
- if not ts.legal():
998
- raise Empty_text_error(data, "No text to process.")
999
- while ts.uplegal():
1000
- if self.pos > progress:
1001
- if report:
1002
- print(self.pos)
1003
- progress += progress_step
1004
- if not ts.item.type in [esc_symb_ty, esc_str_ty]:
1005
- out.append(ts.item)
1006
- ts.next()
1007
- continue
1008
- if 1 == ts.test_env_boundary(ts.item):
1009
- old_pos = ts.pos
1010
- env_name = ts.scan_env_begin()
1011
- if env_name not in env_defs:
1012
- out.extend(ts.data[old_pos : ts.pos])
1013
- continue
1014
- else:
1015
- env_def = env_defs[env_name]
1016
- env_instance = ts.scan_env_rest(env_def)
1017
- result = ts.apply_env_recur(env_instance)
1018
- out.extend(result)
1019
- elif ts.item.val not in command_defs:
1020
- out.append(ts.item)
1021
- ts.next()
1022
- continue
1023
- else:
1024
- command_def = command_defs[ts.item.val]
1025
- command_inst = ts.scan_command(command_def)
1026
- result = ts.apply_command_recur(command_inst)
1027
- out.extend(result)
1028
- return out
1029
-
1030
-
1031
- # Processing files
1032
-
1033
- def process_file(self, file):
1034
- """Returns the new defs.
1035
- """
1036
- file = cut_extension(file, ".tex")
1037
- source_file = "%s.tex" % (file)
1038
- print("File %s [" % (source_file))
1039
- source_fp = open(source_file, "r")
1040
- text_str = source_fp.read()
1041
- source_fp.close()
1042
-
1043
- self.smart_tokenize(text_str, handle_inputs=True)
1044
- if not self.data:
1045
- raise Error("Empty tokenization result.")
1046
- self.reset()
1047
-
1048
- if self.debug:
1049
- source_seen_fname = "%s-seen.tex" % (file)
1050
- source_seen_fp = open(source_seen_fname, "w")
1051
- source_seen_fp.write(detokenize(self.data))
1052
- source_seen_fp.close()
1053
- self.scan_defs()
1054
- self.data = self.apply_all_recur(self.data, report=True)
1055
- result_fname = "%s-clean.tex" % (file)
1056
- print("Writing %s [" % (result_fname))
1057
- result_fp = open(result_fname, "w")
1058
- result_fp.write(self.smart_detokenize())
1059
- result_fp.close()
1060
- print("] file %s" % (result_fname))
1061
- print("] file %s" % (source_file))
1062
-
1063
- def process_if_newer(self, file):
1064
- """
1065
- \input{file} is added to the token list.
1066
- If the input file is newer it is processed.
1067
- Returns tokenized \input{file}.
1068
- """
1069
- file = cut_extension(file, ".tex")
1070
- tex_file = file+".tex"
1071
- clean_tex_file = file+"-clean.tex"
1072
- if newer(clean_tex_file, tex_file):
1073
- print("Using %s." % (clean_tex_file))
1074
- else:
1075
- ts = Tex_stream()
1076
- ts.data = []
1077
- ts.defs = self.defs
1078
- ts.process_file(file)
1079
- to_add = "\\input{%s}" % (file)
1080
- return tokenize(to_add)
1081
-
1082
- # Main
1083
-
1084
- long_optlist = ["debug","defs="]
1085
- options, restargs = getopt_map("x", long_optlist)
1086
-
1087
- debug = False
1088
- if "--debug" in options:
1089
- debug = True
1090
-
1091
- root = restargs[0]
1092
- root = cut_extension(root, ".tex")
1093
- if "--defs" in options:
1094
- defs_root = options["--defs"]
1095
- else:
1096
- defs_root = "%s" % (root)
1097
- defs_db = defs_root
1098
- defs_db_file = defs_root+".db"
1099
-
1100
- ts = Tex_stream()
1101
- ts.defs_db = defs_db
1102
- ts.defs_db_file = defs_db_file
1103
- ts.debug = debug
1104
-
1105
- ts.restore_defs()
1106
- for root in restargs:
1107
- ts.process_file(root)
1108
-
1109
- print("(Re)creating defs db %s" % (defs_db))
1110
- ts.save_defs()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/def_handle.py DELETED
@@ -1,75 +0,0 @@
1
- import argparse
2
- import re
3
-
4
-
5
- def main():
6
- args = parse_command_line()
7
- data = read(args.input)
8
- data = convert(data)
9
- write(args.output, data)
10
-
11
-
12
- def parse_command_line():
13
- parser = argparse.ArgumentParser(
14
- description='Replace \\def with \\newcommand where possible.',
15
- )
16
- parser.add_argument(
17
- 'input',
18
- help='TeX input file with \\def',
19
- )
20
- parser.add_argument(
21
- '--output',
22
- '-o',
23
- required=True,
24
- help='TeX output file with \\newcommand',
25
- )
26
-
27
- return parser.parse_args()
28
-
29
- def read(path):
30
- with open(path, mode='rb') as handle:
31
- return handle.read()
32
-
33
-
34
- def convert(data):
35
- return re.sub(
36
- rb'((?:\\(?:expandafter|global|long|outer|protected)'
37
- rb'(?: +|\r?\n *)?)*)?'
38
- rb'\\def *(\\[a-zA-Z]+) *(?:#+([0-9]))*\{',
39
- replace,
40
- data,
41
- )
42
-
43
-
44
- def replace(match):
45
- prefix = match.group(1)
46
- if (
47
- prefix is not None and
48
- (
49
- b'expandafter' in prefix or
50
- b'global' in prefix or
51
- b'outer' in prefix or
52
- b'protected' in prefix
53
- )
54
- ):
55
- pass #return match.group(0)
56
-
57
- result = rb'\newcommand'
58
-
59
- result += b'{' + match.group(2) + b'}'
60
- if match.lastindex == 3:
61
- result += b'[' + match.group(3) + b']'
62
-
63
- result += b'{'
64
- return result
65
-
66
-
67
- def write(path, data):
68
- with open(path, mode='wb') as handle:
69
- handle.write(data)
70
-
71
- print('=> File written: {0}'.format(path))
72
-
73
-
74
- if __name__ == '__main__':
75
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/gradio_utils.py DELETED
@@ -1,20 +0,0 @@
1
- from transformers import StoppingCriteria
2
- import sys
3
-
4
-
5
- # Handle termination signal
6
- def signal_handler(sig, frame):
7
- print("\nTermination signal received. Shutting down Gradio interface.")
8
- sys.exit(0)
9
-
10
- # Custom stopping criteria
11
- class StopOnTokens(StoppingCriteria):
12
- def __call__(self, input_ids, scores, **kwargs):
13
- stop_ids = [29, 0] # Define specific stop token IDs
14
- return input_ids[0][-1] in stop_ids
15
-
16
- # Toggle task selection
17
- def toggle_selection(current_task, new_task):
18
- """Toggle task selection: deselect if clicked again, otherwise update selection."""
19
- updated_task = "" if current_task == new_task else new_task
20
- return updated_task
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/graph_utils.py DELETED
@@ -1,111 +0,0 @@
1
- import regex
2
- import re
3
-
4
- def retrieve_text_cite(text, command):
5
- base_pattern = (
6
- r'\\' + command + r"(?:\[(?:.*?)\])*\{((?:[^{}]+|\{(?1)\})*)\}(?:\[(?:.*?)\])*"
7
- )
8
-
9
- def extract_text_inside_curly_braces(text):
10
- pattern = r"\{((?:[^{}]|(?R))*)\}"
11
-
12
- match = regex.search(pattern, text)
13
-
14
- if match:
15
- return match.group(1)
16
- else:
17
- return ""
18
-
19
- found_texts = []
20
- for match in regex.finditer(base_pattern, text):
21
- temp_substring = text[match.span()[0] : match.span()[1]]
22
- found_texts.append(extract_text_inside_curly_braces(temp_substring))
23
-
24
- return found_texts
25
-
26
- def get_citing_sentences(content):
27
- content_new = re.sub(r'[\n]+', ' ', content) # keep only one \n
28
- content_new = re.sub(r'e\.g\.' , 'eg', content_new)
29
- content_new = re.sub(r'i\.e\.' , 'eg', content_new)
30
- content_new = re.sub(r'etc\.' , 'etc', content_new)
31
- content_new = re.sub(r' +', ' ', content_new)
32
- sentences = [sentence + '.' for sentence in content_new.split('.')]
33
- citing_sentences = [s for s in sentences if '\\cite' in s]
34
- results = {}
35
- for s in citing_sentences:
36
- citations = retrieve_text_cite(s, 'cite')
37
- final_citations = []
38
- for cite in citations:
39
- final_citations.extend(cite.split(','))
40
- results[s] = final_citations
41
- return results
42
-
43
- def get_intro(content):
44
- sections = retrieve_text_cite(content, 'section')
45
- if sections == []:
46
- return ''
47
- try_intro = [x for x in sections if x.strip().lower() == 'introduction']
48
- if try_intro == []:
49
- return ''
50
- else:
51
- to_find = try_intro[0]
52
- ind = sections.index(to_find)
53
- if ind + 1 < len(sections):
54
- start_marker = f'\\section{{{sections[ind]}}}'
55
- end_marker = f'\\section{{{sections[ind+1]}}}'
56
- start_point = content.find(start_marker)
57
- end_point = content.find(end_marker)
58
- return content[start_point+len(start_marker):end_point]
59
- else:
60
- return ''
61
-
62
- def get_related_works(content):
63
- sections = retrieve_text_cite(content, 'section')
64
- if sections == []:
65
- return ''
66
- possible_related = [
67
- "Literature Review",
68
- "Related Work",
69
- "Related Works",
70
- "Prior Work",
71
- "Prior Works",
72
- "Related Research",
73
- "Research Overview",
74
- "Previous Work",
75
- "Previous Works",
76
- "Review of the Literature",
77
- "Review of Related Literature",
78
- "Survey of Related Work",
79
- "Survey of Related Works",
80
- "Background",
81
- "Research Background",
82
- "Review of Prior Research",
83
- "Literature Survey",
84
- "Overview of Literature",
85
- "Existing Literature",
86
- "Review of Existing Work",
87
- "Review of Existing Works",
88
- "Review of Previous Studies",
89
- "Review of Prior Literature",
90
- "Summary of Related Research",
91
- "Survey of Existing Literature",
92
- "Survey of Literature",
93
- "Existing Research Overview",
94
- "Prior Literature Review"
95
- ]
96
- possible_sections = [x for x in sections if any([True for y in possible_related if y.lower() == x.strip().lower()])]
97
- if possible_sections == []:
98
- return ''
99
- else:
100
- to_find = possible_sections[0]
101
- ind = sections.index(to_find)
102
-
103
- if ind + 1 < len(sections):
104
- start_marker = f'\\section{{{sections[ind]}}}'
105
- end_marker = f'\\section{{{sections[ind+1]}}}'
106
- start_point = content.find(start_marker)
107
- end_point = content.find(end_marker)
108
- return content[start_point+len(start_marker):end_point]
109
-
110
- else:
111
- return ''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/latexpand DELETED
@@ -1,713 +0,0 @@
1
- #!/usr/bin/perl
2
- # Inspired by latexpand by D. Musliner, University of Michigan
3
- # 2012-2023: Matthieu Moy <git@matthieu-moy.fr>
4
- # BSD License
5
-
6
- use strict;
7
- use Cwd;
8
- use Getopt::Long;
9
- use IO::Handle;
10
- use File::Spec;
11
-
12
- my $TEXINPUTS = $ENV{'TEXINPUTS'};
13
- # By default, search in current directory. We use '.' and not getcwd()
14
- # to avoid issues if the working directory contains a ':' character.
15
- if (!$TEXINPUTS) { $TEXINPUTS = '.'; }
16
-
17
- my $verbose;
18
- my $keep_comments;
19
- my $keep_includes;
20
- my $empty_comments;
21
- my $help;
22
- my $long_help;
23
- my %defines = ();
24
- my $output;
25
- my $explain;
26
- my $show_graphics;
27
- my $graphics_extensions = ":.pdf:.png:.jpg:.eps";
28
- my $expand_usepackage;
29
- my $expand_bbl;
30
- my $biber;
31
- my $fatal;
32
- my $version;
33
- my $makeatletter;
34
- my $inside_import;
35
- my $in_enc = "bytes";
36
- my $out_enc = "bytes";
37
-
38
- GetOptions (
39
- 'h' => \$help,
40
- 'help' => \$long_help,
41
- 'verbose|v' => \$verbose,
42
- 'keep-comments' => \$keep_comments,
43
- 'keep-includes' => \$keep_includes,
44
- 'empty-comments' => \$empty_comments,
45
- 'define|d=s%' => \%defines,
46
- 'output|o=s' => \$output,
47
- 'explain' => \$explain,
48
- 'show-graphics' => \$show_graphics,
49
- 'graphics-extensions' => \$graphics_extensions,
50
- 'expand-usepackage' => \$expand_usepackage,
51
- 'expand-bbl=s' => \$expand_bbl,
52
- 'biber=s' => \$biber,
53
- 'fatal' => \$fatal,
54
- 'version' => \$version,
55
- 'makeatletter' => \$makeatletter,
56
- 'in-encoding=s' => \$in_enc,
57
- 'out-encoding=s' => \$out_enc,
58
- ) or pod2usage_wrapper(2);
59
- version() if $version;
60
- pod2usage_wrapper(0) if $help;
61
- pod2usage_wrapper(-exitstatus => 0, -output => \*STDOUT, -verbose => 2) if $long_help;
62
-
63
- sub pod2usage_wrapper
64
- {
65
- # Like pod2usage, but fall back to a simpler implem in case
66
- # pod2usage can't be found.
67
- if (eval {require Pod::Usage;1;} ne 1) {
68
- print "Please install perldoc and Pod::Usage to get proper help.\n";
69
- my $started = 0;
70
- open (my $in, '<', "$0") or die $!;
71
- while (<$in>) {
72
- if ($started) {
73
- print;
74
- }
75
- if (/^__END__$/) {
76
- $started = 1;
77
- }
78
- }
79
- } else {
80
- Pod::Usage->import();
81
- pod2usage(@_);
82
- }
83
- }
84
-
85
- sub get_version
86
- {
87
- # $VERSION's value will be substituted by 'make dist', but the
88
- # next line won't (the string has to be broken to avoid it).
89
- my $VERSION = 'v1.7.2';
90
- if ($VERSION eq '@LATEXPAND' . '_VERSION@') {
91
- my($vol,$dir,$file) = File::Spec->splitpath($0);
92
- chdir($dir);
93
- $VERSION = `git describe --tags HEAD 2>/dev/null`;
94
- }
95
- if ($VERSION eq '') {
96
- $VERSION = '<unknown version>';
97
- }
98
- $VERSION =~ s/^\s+|\s+$//g;
99
- return $VERSION;
100
- }
101
-
102
- sub version
103
- {
104
- print "latexpand version ". get_version() .".\n";
105
- exit(0);
106
- }
107
-
108
- my $nl = "";
109
- if ($empty_comments) {
110
- $nl = "%\n";
111
- }
112
-
113
- if ($output && $output ne "-") {
114
- open (my $OUTPUT, '>', "$output") or die $!;
115
- STDOUT->fdopen(\*$OUTPUT, 'w') or die $!;
116
- }
117
-
118
- sub say
119
- {
120
- if ($verbose) {
121
- print STDERR "$_[0]";
122
- }
123
- }
124
-
125
- my $makeatletter_found;
126
- my $in_preamble;
127
-
128
- use open IN => ":$in_enc", OUT => ":$out_enc";
129
-
130
- foreach my $file (@ARGV)
131
- {
132
- say "processing $file\n";
133
- $makeatletter_found = 0;
134
- $in_preamble = 1;
135
- $inside_import = "";
136
- if ($file =~ /\.bib$/) {
137
- warn "WARNING: latexpand is not meant to be used on BibTeX files like '$file'.\n" .
138
- " Run latexpand on your main .tex file, using '--expand-bbl FILE'\n" .
139
- " or '--biber FILE' if needed to inline the generated bbl file.\n";
140
- } elsif (not $file =~ /\.tex$/) {
141
- warn "WARNING: latexpand is meant to be used on .tex files, which $file isn't.\n";
142
- }
143
- process_file($file, " ");
144
- }
145
-
146
- sub cat_file
147
- {
148
- my $file = shift;
149
- open (my $INFILE, "<", $file) || die "could not open input file '$file'\n";
150
- while (<$INFILE>) {
151
- print;
152
- }
153
- close ($INFILE);
154
- }
155
-
156
- sub process_file
157
- {
158
- my $file = shift;
159
- my $prefix = (shift || "");
160
- my $in_comment = 0;
161
- open(my $FILE, "<", $file) or die "could not open input file '$file'\n";
162
- my $commented_newline = 0;
163
- while (my $line = <$FILE>) {
164
- if ($line =~ /^[ \t]*\\endinput/) {
165
- # Surprisingly, text after \endinput on the
166
- # same line is kept in output. Also, add a
167
- # space (before %), automatically inserted by
168
- # TeX at the end of file.
169
- $line =~ s/\\endinput(.*)\n?/$1 % /;
170
- $in_comment = 1;
171
- process_line($line, $prefix, \$commented_newline);
172
- last;
173
- }
174
- while (my ($k, $v) = each (%defines))
175
- {
176
- $line=~s!\\$k!$v!g;
177
- }
178
- process_line($line, $prefix, \$commented_newline, $file);
179
- if ($line =~ /^%.*[^\n]\z/ || $line =~ /[^\\]%.*[^\n]\z/) {
180
- # file ends with a comment not ending with a newline
181
- print "\n";
182
- }
183
- # Garbage at end of line after \end{document} is
184
- # ignored by LaTeX, but we don't allow anything before
185
- # to avoid e.g. \verb|\end{document}| from terminating
186
- # the file.
187
- if (!$keep_comments && $line =~ /^[ \t]*\\end\{document\}/) {
188
- last;
189
- }
190
- }
191
- close($FILE);
192
- return $in_comment;
193
- }
194
-
195
- sub process_line
196
- {
197
- my ($line, $prefix, $commented_newline, $file) = @_;
198
- $_ = $line;
199
- if ($$commented_newline) {
200
- # Leading whitespaces after a comment is ignored.
201
- # There's no space in:
202
- # Line 1%
203
- # Line 2.
204
- # Match just space and tabs (\s would match \n)
205
- s/^[ \t]*//;
206
- if (/^$/) {
207
- # Deal with:
208
- #
209
- # Line 1 % comment
210
- #
211
- # Line 2
212
- #
213
- # The newline after Line 1 is commented, but we still
214
- # want a new paragraph. We strip the comment together
215
- # with its newline, but re-add a newline to chnge
216
- # paragraph here if needed:
217
- print "\n";
218
- }
219
- }
220
- $$commented_newline = 0;
221
- # Consider \makeatletter only in preamble, because we do want
222
- # to warn on \someCommand{\makeatletter\command@with@arobase}.
223
- if ($in_preamble && /^[^%]*\\makeatletter/) {
224
- $makeatletter_found = 1;
225
- }
226
- if ($in_preamble && /^[^%]*\\makeatother/) {
227
- $makeatletter_found = 0;
228
- }
229
- my $command;
230
- if (!$makeatletter && !$makeatletter_found
231
- && (($command) = /^[^%]*(\\[[:alpha:]]*@[[:alpha:]]*)/)
232
- && ($command ne '\@')) {
233
- print STDERR "Warning: command $command containing @ found in\n";
234
- print STDERR "Warning: $file.\n";
235
- print STDERR "Warning: consider using --makeatletter if the result is not compilable.\n";
236
- }
237
-
238
- # non-comment is a sequence of:
239
- # - escaped character (\\.), including \% and \\
240
- # - neither '%' nor '\'.
241
- my $NON_COMMENT = '([^\\\\%]|\\\\.)*';
242
-
243
- unless ($keep_comments) {
244
- # Special-case for \url{} commands, which may contain '%'
245
- # characters. It's hard to catch them in $NON_COMMENT since we'd
246
- # need a regexp so that "\url{foo" can't match as non-comment in
247
- # the line \url{foo%bar}, but "\url{foo%bar}" would match.
248
- # Escaping these '%' is not mandatory, but allowed, hence we can
249
- # pre-process the line by escaping them, and let latexpand work
250
- # as normal afterwards.
251
- # Known limitation: latexpand doesn't do balanced braces
252
- # recognition, and just refuses both { and } within \url{}
253
- # argument for %-detection to work ([^{}%] below). Fix should be
254
- # possible using
255
- # https://stackoverflow.com/questions/15301708/perl-regular-expression-match-nested-brackets
256
- # but is it worth the trouble? (file an issue or send a merge
257
- # request if you think it is)
258
-
259
- # While there are \url{URL} with unescaped % in URL ...
260
- my $NON_PERCENT = '([^\\}]%|[^{}%])*';
261
- while (/^(?<before>.*\\url\{)(?<url>$NON_PERCENT[^\\}]%$NON_PERCENT)(?<after>\}.*)$/) {
262
- my ($before, $url, $after) = ($+{before}, $+{url}, $+{after});
263
- # escape unescaped % in URL, if any
264
- $url =~ s/([^\\])%/$1\\%/g;
265
- $_ = $before . $url . $after ."\n";
266
- }
267
- if (!$empty_comments) {
268
- # Include \n in pattern to avoid matching
269
- # comments at end of files
270
-
271
- # remove comments + whitespace-only lines completely
272
- if (s/^\s*%.*\n//) {
273
- $$commented_newline = 1;
274
- }
275
-
276
- # Special-case commands at end of line. We
277
- # don't want "\\foo%\nbar" to become
278
- # "\\foobar" (but we still want \@% to result
279
- # in no space!)
280
- if (s/^($NON_COMMENT\\([[:alpha:]]|[[:alpha:]@]{2,}))%.*\n/$1 /) {
281
- $$commented_newline = 1;
282
- } elsif (s/^($NON_COMMENT)%.*\n/$1/) {
283
- # remove only the comment if the line has actual content
284
- $$commented_newline = 1;
285
- }
286
- }
287
- # Apply the "empty comments" treatment unconditionally
288
- # for comments not matched above (it doesn't harm to
289
- # keep an empty comment sometimes, but it may harm to
290
- # leave a real comment if the goal was to strip them).
291
- s/^(([^\\%]|\\.)*)%.*$/$1%/;
292
- }
293
-
294
- unless ($keep_includes) {
295
- # \input{foo.tex}
296
- my $ARGBRACES = '\{\\s*([^"}\\s][^}]*)(\\s*)\}';
297
- # \input{"foo bar.tex"}
298
- my $ARGQUOTED = '\{\\s*"([^"]*)"(\\s*)\}';
299
- # \input foo.tex
300
- my $ARGSPACES = '\\s([^\{\\s][^\\s]+?)\\s()';
301
- my $ARGUMENT = "\\s*?(?|$ARGBRACES|$ARGQUOTED|$ARGSPACES)";
302
-
303
- if (my ($before, $ignored, $full_filename, $trailing, $after)
304
- = /^($NON_COMMENT)\\include$ARGUMENT(.*)$/) {
305
- $full_filename = find_tex_file($full_filename . ".tex");
306
- if ($full_filename) {
307
- say $prefix . "Found include for file: $full_filename\n";
308
- print $before . $nl;
309
- print '\clearpage{}' . $nl;
310
- print "% start include $full_filename\n" if ($explain);
311
- my $in_comment = process_file($full_filename, $prefix . " ");
312
- if ($explain) {
313
- print " % end include $full_filename\n";
314
- } elsif ($in_comment) {
315
- print "\n";
316
- }
317
- print '\clearpage{}' . $nl;
318
- print $nl . $after . "\n";
319
- $_ = "";
320
- }
321
- } elsif (my ($before, $ignored, $full_filename, $trailing, $after)
322
- = /^($NON_COMMENT)\\input$ARGUMENT(.*)$/) {
323
- if ($inside_import) {
324
- $full_filename = $inside_import . $full_filename;
325
- }
326
- $full_filename = find_tex_file($full_filename, ":.tex");
327
- if ($full_filename) {
328
- say $prefix . "Found input for file: $full_filename\n";
329
- # Apparently, in some versions of LaTeX, a space
330
- # after filename in \input{foo.tex } is inserted
331
- # _before_ the inclusion. That was the case for
332
- # me when 31fa806 (deal with space after
333
- # filename in \input and \include, 2019-12-11)
334
- # was written, but is not anymore, hence we just
335
- # throw $trailing away.
336
- print $before . $nl;
337
- print "% start input $full_filename\n" if ($explain);
338
- my $in_comment = process_file($full_filename, $prefix . " ");
339
- if ($explain) {
340
- print " % end input $full_filename\n";
341
- } elsif ($in_comment) {
342
- print "\n";
343
- }
344
- if ($after =~ /[^\s]/) {
345
- # LaTeX produces this space, so let's do it also
346
- print " " . $nl . $after . "\n";
347
- } else {
348
- print " ";
349
- }
350
- $_ = "";
351
- }
352
- } elsif (my ($before, $ignored, $dir, $ignored, $full_filename, $ignored, $after)
353
- = /^($NON_COMMENT)\\(?:sub)?import$ARGUMENT$ARGUMENT(.*)$/) {
354
- if ($explain) {
355
- print "% dir " . $dir ."\n";
356
- print "% full_filename " . $full_filename ."\n";
357
- print "% after " . $after ."\n";
358
- print "% inside_import $inside_import\n";
359
- }
360
- $full_filename = $dir . $full_filename;
361
- if ($inside_import) {
362
- $full_filename = $inside_import . $full_filename;
363
- }
364
- print "% cat(inside_import,dir,full_filename) " . $full_filename ."\n" if ($explain);
365
- $full_filename = find_tex_file($full_filename, ":.tex");
366
- if ($full_filename) {
367
- say $prefix . "Found input for file: $full_filename\n";
368
- print $before . $nl;
369
- print "% start input $full_filename\n" if ($explain);
370
- my $previous_import_dir = $inside_import;
371
- $inside_import = $inside_import . $dir;
372
- my $in_comment = process_file($full_filename, $prefix . " ");
373
- $inside_import = $previous_import_dir;
374
- if ($explain) {
375
- print " % end input $full_filename\n";
376
- } elsif ($in_comment) {
377
- print "\n";
378
- }
379
- if ($after =~ /[^\s]/) {
380
- # LaTeX produces this space, so let's do it also
381
- print " " . $nl . $after . "\n";
382
- } else {
383
- print " ";
384
- }
385
- $_ = "";
386
- }
387
- } elsif (my ($before, $ignored, $args, $full_filename, $ignored, $after)
388
- = /^($NON_COMMENT)\\includegraphics(\[[^\]]*?\]|)$ARGUMENT(.*)$/) {
389
- if ($explain) {
390
- print "% inside_import " . $inside_import ."\n";
391
- print "% before " . $before ."\n";
392
- print "% ignored " . $ignored ."\n";
393
- print "% args " . $args ."\n";
394
- print "% full_filename " . $full_filename ."\n";
395
- print "% after " . $after ."\n";
396
- }
397
- if ($inside_import) {
398
- $full_filename = $inside_import . $full_filename;
399
- print "$before\\includegraphics" . "$args" . "{$full_filename}$after\n";
400
- $_ = "";
401
- }
402
- } elsif (my ($before, $ignored, $args, $full_filename, $ignored, $after)
403
- = /^($NON_COMMENT)\\lstinputlisting(\[[^\]]*?\]|)$ARGUMENT(.*)$/) {
404
- if ($explain) {
405
- print "% inside_import " . $inside_import ."\n";
406
- print "% before " . $before ."\n";
407
- print "% ignored " . $ignored ."\n";
408
- print "% args " . $args ."\n";
409
- print "% full_filename " . $full_filename ."\n";
410
- print "% after " . $after ."\n";
411
- }
412
- if ($inside_import) {
413
- $full_filename = $inside_import . $full_filename;
414
- print "$before\\lstinputlisting" . "$args" . "{$full_filename}$after\n";
415
- $_ = "";
416
- }
417
- }
418
- }
419
- if ($expand_usepackage) {
420
- # Don't bother with before and after text, we just require the
421
- # usepackage to be alone on its line.
422
- if (my ($package_name) = /^\s*\\usepackage\{([^\}]*)\}\s*(%.*)?$/) {
423
- my $full = find_file($package_name . ".sty", $TEXINPUTS);
424
- if ($full) {
425
- say $prefix . "Found package file: $full\n";
426
- process_file($full, $prefix . " ");
427
- $_ = "";
428
- # Forget about any commented newline
429
- # before the \usepackage:
430
- $$commented_newline = 0;
431
- } else {
432
- say $prefix . "Not including external package $package_name\n";
433
- }
434
- }
435
- }
436
- if ($expand_bbl) {
437
- if (my ($before, $bib_name, $after)
438
- = /^(.*)\\(?:bibliography|bibselect)\{([^\}]*)\}(.*)$/) {
439
- # The BBL file is not necessarily $bib_name.
440
- # Take it from the command-line.
441
- print $before . $nl;
442
- say $prefix . "Expanding BBL file: $expand_bbl\n";
443
- process_file($expand_bbl, $prefix . " ");
444
- print " " . $nl . $after . "\n";
445
- $_ = "";
446
- }
447
- }
448
- if ($biber) {
449
- if (my ($before, $after)
450
- = /^(.*)\\(?:addbibresource)\{[^\}]*\}(.*)$/) {
451
- # See https://tex.stackexchange.com/questions/166518/biblatex-include-bbl-problem-with-verb-field/166526#166526
452
- my $biber_noext = $biber;
453
- $biber_noext =~ s/.bbl//;
454
- print $before . $nl;
455
- say $prefix . "Expanding Biber BBL file: $biber\n";
456
- print '\begin{filecontents*}{' . $biber . '}' . "\n";
457
- cat_file($biber);
458
- print "\n";
459
- print '\end{filecontents*}
460
-
461
- \usepackage{xpatch}
462
-
463
- %Patch the biblatex input command.
464
- %replace "testinput-bbl" if you change the name above.
465
- %disable if you want to run biblatex/biber normally
466
- \makeatletter
467
- \patchcmd\blx@bblinput{\blx@blxinit}
468
- {\blx@blxinit
469
- \def\jobname{' . $biber_noext . '}%new jobname
470
- }{}{\fail}
471
- \makeatother
472
- ';
473
- say $prefix . "End expansion of Biber BBL file: $biber\n";
474
- print " " . $nl . $after . "\n";
475
- $_ = "";
476
- }
477
- }
478
- if ($show_graphics) {
479
- if (/\\includegraphics(\[[^\]]*\])?{([^}]*)}/) {
480
- my $full_filename = $2;
481
- if ($inside_import) {
482
- $full_filename = $inside_import . $full_filename;
483
- }
484
- my $full = find_tex_file($full_filename, $graphics_extensions);
485
- say $prefix . "needs graphics file: ";
486
- print STDERR "$full\n";
487
- }
488
- }
489
- if (/^[ \t]*\\begin\{document\}/) {
490
- $in_preamble = 0;
491
- if ($makeatletter) {
492
- print '\makeatletter' . $nl;
493
- }
494
- }
495
- print;
496
- }
497
-
498
- sub unquote
499
- {
500
- my $str = shift;
501
- my $x = substr($str, 0, 1);
502
- my $y = substr($str, -1, 1);
503
- if ($x eq $y && ($x eq '"' || $x eq "'")) {
504
- $str = substr($str, 1, -1);
505
- }
506
- # There's a weird LaTeX syntax: \include{"file\space
507
- # with\space spaces"}, so remove these \space when unquoting.
508
- $str =~ s/\\space / /g;
509
- return $str;
510
- }
511
-
512
- # search $1 in $TEXINPUTS, with possible extensions in $2
513
- sub find_tex_file
514
- {
515
- my $file = unquote(shift);
516
- my $extensions = (shift || ":");
517
- foreach my $ext (split(':', $extensions, -1)) {
518
- my $full = find_file_global($file . $ext);
519
- if ($full) {
520
- return $full;
521
- }
522
- }
523
- if ($fatal) {
524
- die "ERROR: Could not find file [$file]\n";
525
- } else {
526
- print STDERR "Warning: Could not find file [$file]\n";
527
- return;
528
- }
529
- }
530
-
531
- sub find_file_global
532
- {
533
- my $file = shift;
534
- if (open(my $fh, "-|", "kpsewhich", $file)) {
535
- my $full = <$fh>;
536
- $full =~ s/\s+$//;
537
- close($fh);
538
- if ($full) {
539
- return $full;
540
- }
541
- }
542
- # Should be useless, but fall-back in case kpsewhich fails (or is not installed, or ...):
543
- return find_file($file, $TEXINPUTS);
544
- }
545
-
546
- # Find files, not searching for global files (to allow not expanding global .sty packages)
547
- sub find_file
548
- {
549
- my ($file, $path) = @_;
550
- if (File::Spec->file_name_is_absolute($file)) {
551
- if (-e "$file" && ! -d "$file") {
552
- return $file;
553
- } else {
554
- return;
555
- }
556
- }
557
-
558
- # TEXINPUTS=...: (trailing :) means "append default search
559
- # directories". We don't want global directories here, but
560
- # still add . that may be needed.
561
- if (substr($path, -1) eq ':') {
562
- $path .= '.';
563
- }
564
- foreach my $dir (split(':', $path)) {
565
- if (-e "$dir/$file" && ! -d "$dir/$file") {
566
- return("$dir/$file");
567
- }
568
- }
569
- return;
570
- }
571
-
572
-
573
- __END__
574
-
575
- =head1 NAME
576
-
577
- latexpand - Flatten LaTeX file by expanding \include and \input, ... and remove comments
578
-
579
- =head1 SYNOPSIS
580
-
581
- latexpand [options] FILE...
582
-
583
- =head2 Options:
584
-
585
- --verbose show what's going on
586
- --keep-comments don't strip comments (comments are lines
587
- starting with %, and anything below
588
- \end{document})
589
- --empty-comments keep empty comments (i.e. % at end of lines) for clarity
590
- --keep-includes don't expand \input and \include directives
591
- --expand-usepackage
592
- Expand \usepackage{...} directives if the
593
- corresponding .sty file is found in
594
- $TEXINPUTS (or the current directory if
595
- $TEXINPUTS is not set)
596
- --expand-bbl FILE
597
- Expand the bibliography by inlining FILE
598
- (should be a *.bbl file)
599
- --biber FILE Include \bibliography{} with FILE's content,
600
- as needed by biblatex with the biber backend.
601
- (similar to --expand-bbl FILE, but for
602
- biber+biblatex).
603
- --help this help message
604
- --define <key>=<val>, -d <key>=<val>
605
- defines a macro key to be replaced by value, e.g.,
606
- when called with -d foo=bar would replace all occurences
607
- of \foo in the code with bar. Can be supplied multiple times.
608
- --output <file>, -o <file>
609
- generate output in <file>
610
- --explain generate explanatory comments in output
611
- --show-graphics show included graphics
612
- --graphics_extensions
613
- colon-separated list of possible graphics extensions
614
- (used by --show-graphics to find the actual graphics files)
615
- --fatal Die in case a file can't be found.
616
- --makeatletter Insert a \makeatletter in the preamble. In some
617
- rare cases it may break your document, but it
618
- may help fixing bad interactions between
619
- @-commands and inclusion (see BUGS section).
620
- --in-encoding FMT, --out-encoding FMT
621
- File encoding used by input and output files.
622
- This uses the same syntax as PerlIO's layers.
623
- Example:
624
- --in-encoding 'encoding(UTF-8)'
625
- The default is 'bytes' and should always work.
626
-
627
- =head1 USES
628
-
629
- The most common use of latexpand is to simplify distribution of source
630
- LaTeX files, typically to satisfy the requirement of editors and
631
- archival sites (springer, arXiv.org, ...) who force the authors to
632
- submit sources. One does not necessarily want to submit sources with
633
- comments, and uploading a document made of several files including
634
- each other is a bit painful. By default, latexpand answers both
635
- problems by outputing a single LaTeX file that contain no comment.
636
-
637
- =head1 GETTING LATEXPAND
638
-
639
- The latest version of latexpand is available here:
640
-
641
- https://gitlab.com/latexpand/latexpand
642
-
643
- Versions are uploaded to ctan.org from time to time:
644
-
645
- http://www.ctan.org/pkg/latexpand
646
-
647
- =head1 BUGS
648
-
649
- Please, report bugs on the issue tracker on the project site:
650
-
651
- https://gitlab.com/latexpand/latexpand/issues
652
-
653
- =head2 Known bugs
654
-
655
- =head3 Verbatim
656
-
657
- latexpand currently ignores \begin{verbatim} ... \end{verbatim}, and
658
- will therefore process any \include, \input, ... directives that
659
- appear within verbatim environments (while it shouldn't).
660
-
661
- LaTeX comments inside verbatim environments are also incorrectly
662
- stripped. You can use --keep-comments as a workaround to avoid this.
663
-
664
- =head3 Comment environment
665
-
666
- It would be nice to remove code between \begin{comment} and
667
- \end{comment} too if \usepackage{comment} is used.
668
-
669
- Code like
670
-
671
- foo%
672
- \begin{comment}
673
-
674
- will produce the incorrect
675
-
676
- foo\begin{comment}
677
-
678
- A workaround is to use --empty-comments when such tricky usage of the
679
- comments package is done.
680
-
681
- =head3 \makeatletter and use with transfig/xfig with \scalebox{}
682
-
683
- If \input{} or \include{} appears as argument to a command, and the
684
- file included contains \makeatletter, then after expansion, the
685
- \makeatletter and the @-command appear as argument to the command,
686
- which is forbidden because the argument is parsed (and the @-command
687
- badly tokenized) before being executed.
688
-
689
- This happens with
690
-
691
- \scalebox{ \input{file-generated-by-xfig.pdf_t} }
692
-
693
- Workaround: add \makeatletter before the scalebox manually in your
694
- code, like
695
-
696
- \makeatletter{}
697
- \scalebox{ \input{file-generated-by-xfig.pdf_t} }
698
- \makeatother{}
699
-
700
- In the case of xfig generated files, it is necessary only for the
701
- first occurence.
702
-
703
- A more brute-force workaround is to use latexpand --makeatletter.
704
-
705
- =head1 SEE ALSO
706
-
707
- Instructions to include only the relevant .bib items (french):
708
-
709
- https://lacl.fr/~caubert/notes/portabilite-du-tex.html#dependances
710
-
711
- =head1 VERSION
712
-
713
- This is latexpand version v1.7.2.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/utils.py DELETED
@@ -1,701 +0,0 @@
1
- import sys
2
- import regex
3
- import yaml
4
- import shutil
5
- import bibtexparser
6
- from charset_normalizer import from_path
7
- from langdetect import detect
8
- import os
9
- import subprocess
10
- import numpy as np
11
- import networkx as nx
12
- import re
13
-
14
-
15
- def is_venv():
16
- return (hasattr(sys, 'real_prefix') or
17
- (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix))
18
-
19
- def read_yaml_file(file_path):
20
- with open(file_path, 'r') as file:
21
- try:
22
- data = yaml.safe_load(file)
23
- return data
24
- except yaml.YAMLError as e:
25
- print(f"Error reading YAML file: {e}")
26
-
27
- def read_tex_file(file_path):
28
- with open(file_path, 'r', encoding='utf-8') as file:
29
- tex_content = file.read()
30
- return tex_content
31
-
32
- def write_tex_file(file_path, s):
33
- with open(file_path, 'w', encoding='utf-8') as file:
34
- file.write(s)
35
-
36
- def get_core(s):
37
- start = '\\begin{document}'
38
- end = '\\end{document}'
39
- beginning_doc = s.find(start)
40
- end_doc = s.rfind(end)
41
- return s[beginning_doc+len(start):end_doc]
42
-
43
-
44
- def retrieve_text(text, command, keep_text=False):
45
- """Removes '\\command{*}' from the string 'text'.
46
-
47
- Regex `base_pattern` used to match balanced parentheses taken from:
48
- https://stackoverflow.com/questions/546433/regular-expression-to-match-balanced-parentheses/35271017#35271017
49
- """
50
- base_pattern = (
51
- r'\\' + command + r"(?:\[(?:.*?)\])*\{((?:[^{}]+|\{(?1)\})*)\}(?:\[(?:.*?)\])*"
52
- )
53
-
54
- def extract_text_inside_curly_braces(text):
55
- """Extract text inside of {} from command string"""
56
- pattern = r"\{((?:[^{}]|(?R))*)\}"
57
-
58
- match = regex.search(pattern, text)
59
-
60
- if match:
61
- return match.group(1)
62
- else:
63
- return ""
64
-
65
- # Loops in case of nested commands that need to retain text, e.g. \red{hello \red{world}}.
66
- while True:
67
- all_substitutions = []
68
- has_match = False
69
- for match in regex.finditer(base_pattern, text):
70
- # In case there are only spaces or nothing up to the following newline,
71
- # adds a percent, not to alter the newlines.
72
- has_match = True
73
-
74
- if not keep_text:
75
- new_substring = ""
76
- else:
77
- temp_substring = text[match.span()[0] : match.span()[1]]
78
- return extract_text_inside_curly_braces(temp_substring)
79
-
80
- if match.span()[1] < len(text):
81
- next_newline = text[match.span()[1] :].find("\n")
82
- if next_newline != -1:
83
- text_until_newline = text[
84
- match.span()[1] : match.span()[1] + next_newline
85
- ]
86
- if (
87
- not text_until_newline or text_until_newline.isspace()
88
- ) and not keep_text:
89
- new_substring = "%"
90
- all_substitutions.append((match.span()[0], match.span()[1], new_substring))
91
-
92
- for start, end, new_substring in reversed(all_substitutions):
93
- text = text[:start] + new_substring + text[end:]
94
-
95
- if not keep_text or not has_match:
96
- break
97
-
98
-
99
- def reduce_linebreaks(s):
100
- return re.sub(r'(\n[ \t]*)+(\n[ \t]*)+', '\n\n', s)
101
-
102
-
103
- def replace_percentage(s):
104
- return re.sub(r'% *\n', '\n', s)
105
-
106
-
107
- def reduce_spaces(s):
108
- return re.sub(' +', ' ', s)
109
-
110
-
111
- def delete_urls(s):
112
- return re.sub(r'http\S+', '', s)
113
-
114
-
115
- def remove_tilde(s):
116
- s1 = re.sub(r'[~ ]\.', '.', s)
117
- s2 = re.sub(r'[~ ],', ',', s1)
118
- return re.sub(r'{}', '', s2)
119
-
120
-
121
- def remove_verbatim_words(s):
122
- with open("configs/latex_commands.yaml", "r") as stream:
123
- read_config = yaml.safe_load(stream)
124
-
125
- for command in read_config['verbatim_to_delete']:
126
- s = s.replace(command, '')
127
-
128
- for command in read_config['two_arguments']:
129
- pattern = r'\\' + command + r'{[^}]*}' + r'{[^}]*}'
130
- s = re.sub(pattern, '', s)
131
-
132
- for command in read_config['three_arguments']:
133
- pattern = r'\\' + command + r'{[^}]*}' + r'{[^}]*}' + r'{[^}]*}'
134
- s = re.sub(pattern, '', s)
135
-
136
- for command in read_config['two_arguments_elaborate']:
137
- s = remove_multargument(s, '\\' + command, 2)
138
-
139
- for command in read_config['three_arguments_elaborate']:
140
- s = remove_multargument(s, '\\' + command, 3)
141
-
142
- for command in read_config['replace_comments']:
143
- pattern = r'\\' + command
144
- s = re.sub(pattern, '%', s)
145
-
146
- s = re.sub(
147
- r'\\end{[\s]*abstract[\s]*}',
148
- '',
149
- s,
150
- flags=re.IGNORECASE
151
- )
152
-
153
- s = re.sub(
154
- r'\\begin{[\s]*abstract[\s]*}',
155
- 'Abstract\n\n',
156
- s,
157
- flags=re.IGNORECASE
158
- )
159
- return s
160
-
161
-
162
- def yes_or_no(s):
163
- return 1 if "Yes" == s[0:3] else 0 if "No" == s[0:2] else -1
164
-
165
-
166
- def get_main(directory):
167
- file_paths = []
168
- for root, _, files in os.walk(directory):
169
- for file in files:
170
- file_path = os.path.join(root, file)
171
- file_paths.append(file_path)
172
- latex_paths = [f for f in file_paths if f.endswith('.tex')]
173
- number_tex = len(latex_paths)
174
- if number_tex == 0:
175
- return None
176
- if number_tex == 1:
177
- return latex_paths[0]
178
- adjacency = np.zeros((number_tex, number_tex))
179
- keys = [os.path.basename(path) for path in latex_paths]
180
- reg_ex = r'\\input{(.*?)}|\\include{(.*?)}|\\import{(.*?)}|\\subfile{(.*?)}|\\include[*]{(.*?)}|}'
181
- for i,file in enumerate(latex_paths):
182
- content = read_tex_file(file)
183
- find_pattern_input = re.findall(reg_ex, content)
184
- find_pattern_input = [tup for tup in find_pattern_input if not all(element == "" for element in tup)]
185
- number_matches = len(find_pattern_input)
186
- if number_matches == 0:
187
- continue
188
- else:
189
- content = replace_imports(file, content)
190
- reg_ex_clean = r'\\input{(.*?)}|\\include{(.*?)}'
191
- find_pattern_input = re.findall(reg_ex_clean, content)
192
- number_matches = len(find_pattern_input)
193
- for j in range(number_matches):
194
- match = find_pattern_input[j]
195
- non_empty_match = [t for t in match if t]
196
- for non_empty in non_empty_match:
197
- base_match = os.path.basename(non_empty)
198
- if not base_match.endswith('.tex'):
199
- base_match = base_match + '.tex'
200
- if base_match not in keys:
201
- continue
202
- ind = keys.index(base_match)
203
- adjacency[i][ind] = 1
204
- G = nx.from_numpy_array(adjacency, create_using=nx.DiGraph)
205
- connected_components = list(nx.weakly_connected_components(G))
206
- size_connected = [len(x) for x in connected_components]
207
- maximum_size = max(size_connected)
208
- biggest_connected = [x for x in connected_components if len(x) == maximum_size]
209
- if len(biggest_connected)>1:
210
- roots = [n for connected in biggest_connected for n in connected if not list(G.predecessors(n))]
211
- _check = []
212
- for r in roots:
213
- try:
214
- _check.append(check_begin(latex_paths[r]))
215
- except Exception as e:
216
- _check.append(False)
217
- potentials_files = [latex_paths[x] for x, y in zip(roots, _check) if y == True]
218
- sizes_files = [os.path.getsize(x) for x in potentials_files]
219
- return potentials_files[sizes_files.index(max(sizes_files))]
220
-
221
- else:
222
- roots = [n for n in biggest_connected[0] if not list(G.predecessors(n))]
223
- return latex_paths[roots[0]]
224
-
225
-
226
- def initial_clean(directory, config):
227
- config_cmd = ''
228
- if config == True:
229
- config_cmd = '--config configs/cleaning_config.yaml'
230
- temp_dir = directory[:directory.rfind('/')] + '_temp' + '/'
231
- shutil.copytree(directory, temp_dir)
232
- try:
233
- command_res = os.system('arxiv_latex_cleaner --keep_bib {} {}'.format(directory, config_cmd))
234
- if command_res != 0:
235
- raise Exception('Error cleaning')
236
- else:
237
- shutil.rmtree(temp_dir)
238
-
239
- except Exception as e:
240
- shutil.rmtree(directory)
241
- os.rename(temp_dir, directory)
242
- file_paths = []
243
- for root, _, files in os.walk(directory):
244
- for file in files:
245
- file_path = os.path.join(root, file)
246
- file_paths.append(file_path)
247
- latex_paths = [f for f in file_paths if f.endswith('.tex')]
248
- for p in latex_paths:
249
- results = from_path(p)
250
- with open(p, 'w', encoding='utf-8') as f:
251
- f.write(str(results.best()))
252
- os.system('arxiv_latex_cleaner --keep_bib {} {}'.format(directory, config_cmd))
253
- cleaned_directory = directory[:directory.rfind('/')] + '_arXiv'
254
- shutil.rmtree(directory)
255
- os.rename(cleaned_directory, directory)
256
-
257
-
258
- def check_begin(directory):
259
- content = read_tex_file(directory)
260
- english = detect(content) == 'en'
261
- return True and english if re.findall(r'\\begin{document}', content) else False
262
-
263
-
264
- def post_processing(extracted_dir, file):
265
- _dir = os.path.dirname(file) + '/'
266
- perl_expand(file)
267
- file = _dir + 'merged_latexpand.tex'
268
- try:
269
- de_macro(file)
270
- file = _dir + 'merged_latexpand-clean.tex'
271
- except Exception as e:
272
- pass
273
- try:
274
- def_handle(file)
275
- except Exception as e:
276
- pass
277
- try:
278
- declare_operator(file) # has additional add-ons
279
- except Exception as e:
280
- pass
281
- try:
282
- de_macro(file)
283
- file = _dir + os.path.splitext(os.path.basename(file))[0] + '-clean' + '.tex'
284
- except Exception as e:
285
- pass
286
- initial_clean(_dir, config=True)
287
- initial_clean(_dir, config=False)
288
- tex_content = read_tex_file(file)
289
- final_tex = reduce_spaces(
290
- delete_urls(
291
- remove_tilde(
292
- reduce_linebreaks(
293
- replace_percentage(
294
- remove_verbatim_words(
295
- tex_content
296
- )
297
- )
298
- )
299
- )
300
- )
301
- ).strip()
302
- shutil.rmtree(extracted_dir)
303
- os.makedirs(extracted_dir)
304
- write_tex_file(extracted_dir + 'final_cleaned.tex', final_tex)
305
- initial_clean(extracted_dir, config=False)
306
- return extracted_dir + 'final_cleaned.tex'
307
-
308
-
309
- def perl_expand(file):
310
- # Save the current working directory
311
- oldpwd = os.getcwd()
312
- target_dir = os.path.dirname(file) + '/'
313
- # Correctly construct the path
314
- target = os.path.join(target_dir, 'latexpand')
315
- src = './src/utils/latexpand'
316
- # Copy the `latexpand` script to the target directory
317
- shutil.copyfile(src, target)
318
- # Change to the target directory
319
- os.chdir(target_dir)
320
-
321
- # Run the perl command without shell=True and handle redirection within Python
322
- with open('merged_latexpand.tex', 'w') as output_file:
323
- subprocess.run(['perl', 'latexpand', os.path.basename(file)],
324
- stdout=output_file, stderr=subprocess.DEVNULL)
325
-
326
- # Return to the original directory
327
- os.chdir(oldpwd)
328
-
329
-
330
- def de_macro(file):
331
- # Save the current working directory\
332
- oldpwd = os.getcwd()
333
- target_dir = os.path.dirname(file) + '/'
334
- # Construct the target path
335
- target = os.path.join(target_dir, 'de-macro.py')
336
- src = '.src/utils/de-macro.py'
337
-
338
- # Copy the `de-macro.py` script to the target directory
339
- shutil.copyfile(src, target)
340
- # Change to the target directory
341
- os.chdir(target_dir)
342
-
343
- # Run the de-macro script without os.system and capture errors
344
- try:
345
- subprocess.run(['python3', 'de-macro.py', os.path.basename(file)],
346
- stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
347
- except subprocess.CalledProcessError as e:
348
- raise Exception(f"Error de-macro: {e}") from e
349
- finally:
350
- # Always return to the original directory
351
- os.chdir(oldpwd)
352
-
353
-
354
- def def_handle(file):
355
- h = os.system('python3 src/utils/def_handle.py {} --output {}'.format(file, file))
356
- if h != 0:
357
- raise Exception('Error def handle')
358
-
359
-
360
- def declare_operator(file):
361
- s = read_tex_file(file)
362
- ## Operators
363
- pattern = r'\\DeclareMathOperator'
364
- s = re.sub(pattern, r'\\newcommand', s)
365
- pattern = {
366
- r'\\newcommand\*': r'\\newcommand',
367
- r'\\providecommand\*': r'\\newcommand',
368
- r'\\providecommand': r'\\newcommand',
369
- r'\\renewcommand\*': r'\\renewcommand',
370
- r'\\newenvironment\*': r'\\newenvironment',
371
- r'\\renewenvironment\*': r'\\renewenvironment'
372
- }
373
- s = re.sub(r'\\end +', r'\\end', s)
374
- for key in pattern:
375
- s = re.sub(key, pattern[key], s)
376
- ## Title
377
- start = '\\begin{document}'
378
- beginning_doc = s.find(start)
379
- pattern = {
380
- r'\\icmltitlerunning\*': r'\\title',
381
- r'\\icmltitlerunning': r'\\title',
382
- r'\\inlinetitle\*': r'\\title',
383
- r'\\icmltitle\*': r'\\title',
384
- r'\\inlinetitle': r'\\title',
385
- r'\\icmltitle': r'\\title',
386
- r'\\titlerunning\*': r'\\title',
387
- r'\\titlerunning': r'\\title',
388
- r'\\toctitle': r'\\title',
389
- r'\\title\*': r'\\title',
390
- r'\\TITLE\*': r'\\title',
391
- r'\\TITLE': r'\\title',
392
- r'\\Title\*': r'\\title',
393
- r'\\Title': r'\\title',
394
- }
395
- for key in pattern:
396
- s = re.sub(key, pattern[key], s)
397
- find_potential = s.find('\\title')
398
-
399
- ## Remove \\
400
- title_content = retrieve_text(s, 'title', keep_text = True)
401
- if title_content != None:
402
- cleaned_title = re.sub(r'\\\\', ' ', title_content)
403
- cleaned_title = re.sub(r'\n',' ', cleaned_title)
404
- cleaned_title = re.sub(r'\~',' ', cleaned_title)
405
- s = s.replace(title_content, cleaned_title)
406
- if find_potential != -1 and find_potential < beginning_doc:
407
- s = s.replace('\\maketitle', cleaned_title)
408
-
409
- ## Cite and ref commands
410
- pattern = {
411
- r'\\citep\*': r'\\cite',
412
- r'\\citet\*': r'\\cite',
413
- r'\\citep': r'\\cite',
414
- r'\\citet': r'\\cite',
415
- r'\\cite\*': r'\\cite',
416
- r'\\citealt\*': r'\\cite',
417
- r'\\citealt': r'\\cite',
418
- r'\\citealtp\*': r'\\cite',
419
- r'\\citealp': r'\\cite',
420
- r'\\citeyear\*': r'\\cite',
421
- r'\\citeyear': r'\\cite',
422
- r'\\citeauthor\*': r'\\cite',
423
- r'\\citeauthor': r'\\cite',
424
- r'\\citenum\*': r'\\cite',
425
- r'\\citenum': r'\\cite',
426
- r'\\cref': r'\\ref',
427
- r'\\Cref': r'\\ref',
428
- r'\\factref': r'\\ref',
429
- r'\\appref': r'\\ref',
430
- r'\\thmref': r'\\ref',
431
- r'\\secref': r'\\ref',
432
- r'\\lemref': r'\\ref',
433
- r'\\corref': r'\\ref',
434
- r'\\eqref': r'\\ref',
435
- r'\\autoref': r'\\ref',
436
- r'begin{thm}': r'begin{theorem}',
437
- r'begin{lem}': r'begin{lemma}',
438
- r'begin{cor}': r'begin{corollary}',
439
- r'begin{exm}': r'begin{example}',
440
- r'begin{defi}': r'begin{definition}',
441
- r'begin{rem}': r'begin{remark}',
442
- r'begin{prop}': r'begin{proposition}',
443
- r'end{thm}': r'end{theorem}',
444
- r'end{lem}': r'end{lemma}',
445
- r'end{cor}': r'end{corollary}',
446
- r'end{exm}': r'end{example}',
447
- r'end{defi}': r'end{definition}',
448
- r'end{rem}': r'end{remark}',
449
- r'end{prop}': r'end{proposition}',
450
- }
451
-
452
- for key in pattern:
453
- s = re.sub(key, pattern[key], s)
454
-
455
-
456
- pattern = {
457
- r'subsubsection': r'section',
458
- r'subsubsection ': r'section',
459
- r'subsubsection\*': r'section',
460
- r'subsubsection\* ': r'section',
461
- r'subsection': r'section',
462
- r'subsection ': r'section',
463
- r'subsection\*': r'section',
464
- r'subsection\* ': r'section',
465
- r'section ': r'section',
466
- r'section\*': r'section',
467
- r'section\* ': r'section',
468
- r'chapter': r'section',
469
- r'chapter ': r'section',
470
- r'chapter\*': r'section',
471
- r'chapter\* ': r'section',
472
- r'mysubsubsection': r'section',
473
- r'mysubsection': r'section',
474
- r'mysection': r'section',
475
- }
476
-
477
- for key in pattern:
478
- s = re.sub(key, pattern[key], s)
479
-
480
- # In case any new commands for appendix/appendices
481
- s = re.sub(r'newcommand{\\appendix}', '', s)
482
- s = re.sub(r'newcommand{\\appendices}', '', s)
483
- s = get_core(s)
484
-
485
- ## In case of double titles being defined
486
- title_content = retrieve_text(s, 'title', keep_text = True)
487
- if title_content != None:
488
- cleaned_title = re.sub(r'\\\\', ' ', title_content)
489
- cleaned_title = re.sub(r'\n',' ', cleaned_title)
490
- cleaned_title = re.sub(r'\~',' ', cleaned_title)
491
- s = s.replace(title_content, cleaned_title)
492
- write_tex_file(file, s)
493
-
494
-
495
- def replace_imports(file, s):
496
- regex_p1 = r'\\import{(.*?)}{(.*?)}'
497
- s = re.sub(regex_p1, r"\\input{\1\2}", s)
498
- regex_p2 = r'\\subfile{(.*?)}'
499
- s = re.sub(regex_p2, r"\\input{\1}", s)
500
- regex_p3 = r'\\include[*]{(.*?)}'
501
- s = re.sub(regex_p3, r"\\input{\1}", s)
502
- write_tex_file(file, s)
503
- return s
504
-
505
-
506
- def remove_multargument(s, target, k):
507
- ind = s.find(target)
508
- while ind != -1:
509
- start_ind = ind + len(target)
510
- stack_open = 0
511
- stack_close = 0
512
- track_arg = 0
513
- for i, char in enumerate(s[start_ind:]):
514
- if char == '{':
515
- stack_open += 1
516
- if char == '}':
517
- stack_close += 1
518
- if stack_open !=0 and stack_close !=0:
519
- if stack_open == stack_close:
520
- track_arg += 1
521
- stack_open = 0
522
- stack_close = 0
523
- if track_arg == k:
524
- break
525
- s = s[:ind] + s[start_ind + i + 1:]
526
- ind = s.find(target)
527
- return s
528
-
529
-
530
- def fix_citations(s):
531
- pattern = {
532
- r'\\citep\*': r'\\cite',
533
- r'\\citet\*': r'\\cite',
534
- r'\\citep': r'\\cite',
535
- r'\\citet': r'\\cite',
536
- r'\\cite\*': r'\\cite',
537
- r'\\citealt\*': r'\\cite',
538
- r'\\citealt': r'\\cite',
539
- r'\\citealtp\*': r'\\cite',
540
- r'\\citealp': r'\\cite',
541
- r'\\citeyear\*': r'\\cite',
542
- r'\\citeyear': r'\\cite',
543
- r'\\citeauthor\*': r'\\cite',
544
- r'\\citeauthor': r'\\cite',
545
- r'\\citenum\*': r'\\cite',
546
- r'\\citenum': r'\\cite'
547
- }
548
- for key in pattern:
549
- s = re.sub(key, pattern[key], s)
550
- return s
551
-
552
- def find_bib(directory):
553
- file_paths = []
554
- for root, _, files in os.walk(directory):
555
- for file in files:
556
- file_path = os.path.join(root, file)
557
- file_paths.append(file_path)
558
- bib_paths = [f for f in file_paths if f.endswith('.bib')]
559
- return bib_paths
560
-
561
- def create_bib_from_bbl(bibfile):
562
- with open(bibfile, 'r') as f:
563
- content = f.read()
564
- library_raw = bibtexparser.parse_string(content)
565
- library = {}
566
- for block in library_raw.blocks:
567
- if isinstance(
568
- block,
569
- (bibtexparser.model.DuplicateBlockKeyBlock, bibtexparser.model.ParsingFailedBlock, bibtexparser.model.ImplicitComment)
570
- ):
571
- continue
572
- fields = {}
573
- for field in block.fields:
574
- fields[field.key] = field.value
575
-
576
- ## Get a good title one ##
577
- field_content = fields["note"]
578
- field_content = field_content.replace("\n", " ")
579
- field_content = re.sub(" +", " ", field_content)
580
- if field_content.find("``") != -1 and field_content.find("\'\'") != -1:
581
- title = (
582
- field_content[field_content.find("``") + 2 : field_content.find("\'\'")]
583
- .replace("\\emph", "")
584
- .replace("\\emp", "")
585
- .replace("\\em", "")
586
- .replace(",", "")
587
- .replace("{", "")
588
- .replace("}","")
589
- .replace("``", "")
590
- .replace("\'\'", "")
591
- .strip(".")
592
- .strip()
593
- .strip(".")
594
- .lower()
595
- )
596
- fields['title'] = title
597
- else:
598
- if field_content.count("\\newblock") == 2:
599
- field_content = field_content.replace("\\newblock", "``", 1)
600
- field_content = field_content.replace("\\newblock", "\'\'", 1)
601
- if field_content.find("``") != -1 and field_content.find("\'\'") != -1:
602
- title = (
603
- field_content[field_content.find("``") + 2 : field_content.find("\'\'")]
604
- .replace("\\emph", "")
605
- .replace("\\emp", "")
606
- .replace("\\em", "")
607
- .replace(",", "")
608
- .replace("{", "")
609
- .replace("}","")
610
- .replace("``", "")
611
- .replace("\'\'", "")
612
- .strip(".")
613
- .strip()
614
- .strip(".")
615
- .lower()
616
- )
617
- fields['title'] = title
618
- library[block.key] = fields
619
- return library
620
-
621
-
622
- def create_bib(bibfile):
623
- with open(bibfile, 'r') as f:
624
- content = f.read()
625
- library_raw = bibtexparser.parse_string(content)
626
-
627
- library = {}
628
- for block in library_raw.blocks:
629
- if isinstance(
630
- block,
631
- (bibtexparser.model.DuplicateBlockKeyBlock, bibtexparser.model.ParsingFailedBlock, bibtexparser.model.ImplicitComment)
632
- ):
633
- continue
634
- fields = {}
635
- for field in block.fields:
636
- fields[field.key] = field.value.replace('{', '').replace('}', '')
637
- if field.key == 'title':
638
- title = re.sub(r'[\n]+', ' ', field.value) # keep only one \n
639
- title = re.sub(r' +', ' ', title)
640
- fields[field.key] = (
641
- title.replace("\\emph", "")
642
- .replace("\\emp", "")
643
- .replace("\\em", "")
644
- .replace(",", "")
645
- .replace("{", "")
646
- .replace("}", "")
647
- .strip(".")
648
- .strip()
649
- .strip(".")
650
- .lower()
651
- )
652
- if 'title' not in fields:
653
- continue
654
- library[block.key] = fields
655
- return library
656
-
657
-
658
- def find_bbl(directory):
659
- file_paths = []
660
- for root, _, files in os.walk(directory):
661
- for file in files:
662
- file_path = os.path.join(root, file)
663
- file_paths.append(file_path)
664
- bib_paths = [f for f in file_paths if f.endswith('.bbl')]
665
- return bib_paths
666
-
667
-
668
- def textobib(file):
669
- oldpwd = os.getcwd()
670
- target_dir = os.path.dirname(file) + '/'
671
- target = target_dir + 'tex2bib'
672
- src = './tex2bib'
673
- shutil.copyfile(src, target)
674
- os.chdir(target_dir)
675
- output_file = os.path.splitext(os.path.basename(file))[0] + '.bib'
676
- os.system('perl tex2bib -i {} -o {}'.format(os.path.basename(file), output_file))
677
- os.chdir(oldpwd)
678
- return target_dir + output_file
679
-
680
-
681
- def get_library_bib(bib_files):
682
- library = []
683
- for bib_file in bib_files:
684
- library.append(create_bib(bib_file))
685
- final_library = {}
686
- for d in library:
687
- final_library.update(d)
688
- return final_library
689
-
690
-
691
- def get_library_bbl(bbl_files):
692
- bib_files = []
693
- for bbl_file in bbl_files:
694
- bib_files.append(textobib(bbl_file))
695
- library = []
696
- for bib_file in bib_files:
697
- library.append(create_bib_from_bbl(bib_file))
698
- final_library = {}
699
- for d in library:
700
- final_library.update(d)
701
- return final_library