File size: 9,659 Bytes
a47277e
 
a1a4838
a47277e
 
 
 
 
 
 
 
 
 
 
08b986d
a47277e
08b986d
a47277e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76f33f7
d41fdee
76f33f7
22868c0
a47277e
76f33f7
 
a47277e
 
 
 
 
 
 
9e2d5ab
a47277e
 
 
 
 
 
9e2d5ab
a47277e
 
 
08b986d
 
a47277e
08b986d
 
a47277e
 
 
 
 
 
9e2d5ab
a47277e
 
 
 
1ad6abd
a47277e
 
 
76f33f7
 
 
 
 
a47277e
 
 
 
 
 
 
 
 
 
 
 
76f33f7
 
 
 
a47277e
 
 
 
 
 
 
 
 
 
 
 
76f33f7
a47277e
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
import bibtexparser
import json
import gradio as gr

class ConsistencyHandler:

    def __init__(self):
        self.articleExtraFields = set()
        self.inbookExtraFields = set()
        self.techreportExtraFields = set()
        self.inproceedingsExtraFields = set()
        # self.miscExtraFields = set()

        self.articleImportantFields = {'year', 'author', 'title', 'journal', 'volume', 'pages'}
        self.inbookImportantFields = {'author', 'year', 'booktitle'} # to check for publisher, title
        self.techreportImportantFields = {'title', 'author', 'institution', 'year', 'number'}
        self.inproceedingsImportantFields = {'author', 'title', 'booktitle', 'year', 'pages'} # to check for publisher
        # self.miscImportantFields = set()

        self.articleCount = 0
        self.inbookCount = 0
        self.techreportCount = 0
        self.inproceedingscount = 0
        # self.miscCount = 0

        
    #######################################################################################################################################################
    
    def checkArticles(self, entry: dict):
        allFields = set(entry.keys())
        metadata = {'ID', 'number', 'ENTRYTYPE', 'date'}

        allFields = allFields-metadata

        # consistency error list 
        consistencyErrorFields = set()

        # important fields error list
        importantErrorFields = set()

        if(self.articleImportantFields.issubset(allFields)==False):
            # find the asterik fields present or not
            importantErrorFields = self.articleImportantFields-allFields.intersection(self.articleImportantFields)

        # code to check consistency
        tempFields = allFields-self.articleImportantFields

        for field in tempFields:
            if(self.articleCount == 0 and (field not in self.articleExtraFields)):
                self.articleExtraFields.add(field)
            elif (field not in self.articleExtraFields):
                consistencyErrorFields.add(field)
                # print('here')
            # else:
            #     print("kuch to gadbad hai iss field me:- ", field, "\n ", self.articleExtraFields)

        if (self.articleExtraFields.issubset(tempFields)==False):
            consistencyErrorFields.union(tempFields-self.articleExtraFields)

        self.articleCount+=1

        return importantErrorFields, consistencyErrorFields

    #######################################################################################################################################################
    
    def checkInbook(self, entry: dict):
        allFields = set(entry.keys())
        metadata = {'ID', 'number', 'ENTRYTYPE', 'date', 'doi'}

        allFields = allFields-metadata

        # consistency error list 
        consistencyErrorFields = set()

        # important fields error list
        importantErrorFields = set()

        if(self.inbookImportantFields.issubset(allFields)==False):
            # find the asterik fields present or not
            importantErrorFields = self.inbookImportantFields-allFields.intersection(self.inbookImportantFields)

        # code to check consistency
        tempFields = allFields-self.inbookImportantFields

        for field in tempFields:
            if(self.inbookCount == 0 and (field in self.inbookExtraFields)==False):
                self.inbookExtraFields.add(field)
    
            elif field not in self.inbookExtraFields:
                consistencyErrorFields.add(field)
            # else:
            #     print("kuch to gadbad hai iss field me:- ", field)

        if (self.inbookExtraFields.issubset(tempFields)==False):
            consistencyErrorFields.union(tempFields-self.inbookExtraFields)
                
        self.inbookCount+=1

        return importantErrorFields, consistencyErrorFields
        
    #######################################################################################################################################################

    def checkTechreport(self, entry: dict):  
        allFields = set(entry.keys())
        metadata = {'ID', 'ENTRYTYPE', 'date'}

        allFields = allFields-metadata

        # consistency error list 
        consistencyErrorFields = set()

        # important fields error list
        importantErrorFields = set()

        if(self.techreportImportantFields.issubset(allFields)==False):
            # find the asterik fields present or not
            importantErrorFields = self.techreportImportantFields-allFields.intersection(self.techreportImportantFields)

        # code to check consistency
        tempFields = allFields-self.techreportImportantFields

        for field in tempFields:
            if(self.techreportCount == 0 and (field in self.techreportExtraFields)==False):
                self.techreportExtraFields.add(field)
            elif field not in self.techreportExtraFields:
                consistencyErrorFields.add(field)
            # else:
            #     print("kuch to gadbad hai iss field me:- ", field)

        if (self.techreportExtraFields.issubset(tempFields)==False):
            consistencyErrorFields.union(tempFields-self.techreportExtraFields)
                
        self.techreportCount+=1

        return importantErrorFields, consistencyErrorFields
        
    def checkInproceedings(self, entry: dict):  
        allFields = set(entry.keys())
        metadata = {'ID', 'ENTRYTYPE', 'date', 'series'}

        allFields = allFields-metadata

        # consistency error list 
        consistencyErrorFields = set()

        # important fields error list
        importantErrorFields = set()

        if(self.inproceedingsImportantFields.issubset(allFields)==False):
            # find the asterik fields present or not
            importantErrorFields = self.inproceedingsImportantFields-allFields.intersection(self.inproceedingsImportantFields)

        # code to check consistency
        tempFields = allFields-self.inproceedingsImportantFields

        for field in tempFields:
            if(self.inproceedingscount == 0 and (field not in self.inproceedingsExtraFields)):
                self.inproceedingsExtraFields.add(field)
            elif field not in self.inproceedingsExtraFields:
                consistencyErrorFields.add(field)

                
        if (self.inproceedingsExtraFields.issubset(tempFields)==False):
            consistencyErrorFields.union(tempFields-self.inproceedingsExtraFields)
                
        self.inproceedingscount+=1

        return importantErrorFields, consistencyErrorFields 

    def checkMisc():  
        pass 



def bibtex_to_dict_list(bibtex_string):
    # Parse the BibTeX string
    bib_database = bibtexparser.loads(bibtex_string)
    
    # Convert each entry to a dictionary and return as a list
    return [dict(entry) for entry in bib_database.entries]


def referenceErrorParser(bibtex_string: str, coordinateStr: str):
    print("Bibtex: \n \n"+bibtex_string, '\n', "Coordinates: \n \n"+coordinateStr)
    data = json.loads(coordinateStr)
    checker = ConsistencyHandler()
    entries = bibtex_to_dict_list(bibtex_string)

    # for finding Errors
    for entry in entries:
    
        # article
        if entry['ENTRYTYPE']=="article":
            imp, cons = checker.checkArticles(entry)
    
            entry['asterikError'] = list(imp)
            entry['consistencyError'] = list(cons)
    
        # inbook
        elif entry['ENTRYTYPE']=="inbook":
            imp, cons = checker.checkInbook(entry)
    
            entry['asterikError'] = list(imp)
            entry['consistencyError'] = list(cons)
    
            
        # techreport
        # elif entry['ENTRYTYPE']=="techreport":
        #     imp, cons = checker.checkTechreport(entry)
    
        #     entry['asterikError'] = list(imp)
        #     entry['consistencyError'] = list(cons)
    
        # inproceedings
        elif entry['ENTRYTYPE']=="inproceedings":
            imp, cons = checker.checkInproceedings(entry)
            
            entry['asterikError'] = list(imp)
            entry['consistencyError'] = list(cons)
    
        # misc
        elif entry['ENTRYTYPE']=="misc":
            # return warning for this section
            entry['warningMessage'] = "For miscellaneous fields, certain key details must be carefully included when citing, such as the year, volume, author, publisher, company or organization, and the link (especially when citing a website), among others."
    
        else:
            print(f"{entry['ENTRYTYPE']} has to be added for processing.")


    # For giving coordinates
    for i in range(len(entries)):
        entries[i]['pos']=data['refBibs'][i]['pos']
        
    
    outputJson = json.dumps(entries)

    return outputJson
    

def create_interface():
    with gr.Blocks(title="BibTex Analyser") as interface:
        gr.Markdown("# Reference Validation")
        
        with gr.Row():
            text_input1 = gr.TextArea()

        with gr.Row():
            text_input2 = gr.TextArea()
        
        with gr.Row():
            analyze_btn = gr.Button("Get Corrections")
        
        with gr.Row():
            results_output = gr.TextArea(
                label="Analysis Results",
                show_label=True
            )
        
        analyze_btn.click(
            fn=referenceErrorParser,
            inputs=[text_input1, text_input2],
            outputs=results_output
        )
    
    return interface

if __name__ == "__main__":
    interface = create_interface()
    interface.launch(
        share=False,  # Set to False in production
    )