import bibtexparser import json import gradio as gr class ConsistencyHandler: def __init__(self): self.articleExtraFields = set() self.inbookExtraFields = set() self.techreportExtraFields = set() self.inproceedingsExtraFields = set() # self.miscExtraFields = set() self.articleImportantFields = {'year', 'author', 'title', 'journal', 'volume', 'pages'} self.inbookImportantFields = {'author', 'year', 'booktitle'} # to check for publisher, title self.techreportImportantFields = {'title', 'author', 'institution', 'year', 'number'} self.inproceedingsImportantFields = {'author', 'title', 'booktitle', 'year', 'pages'} # to check for publisher # self.miscImportantFields = set() self.articleCount = 0 self.inbookCount = 0 self.techreportCount = 0 self.inproceedingscount = 0 # self.miscCount = 0 ####################################################################################################################################################### def checkArticles(self, entry: dict): allFields = set(entry.keys()) metadata = {'ID', 'number', 'ENTRYTYPE', 'date'} allFields = allFields-metadata # consistency error list consistencyErrorFields = set() # important fields error list importantErrorFields = set() if(self.articleImportantFields.issubset(allFields)==False): # find the asterik fields present or not importantErrorFields = self.articleImportantFields-allFields.intersection(self.articleImportantFields) # code to check consistency tempFields = allFields-self.articleImportantFields for field in tempFields: if(self.articleCount == 0 and (field not in self.articleExtraFields)): self.articleExtraFields.add(field) elif (field not in self.articleExtraFields): consistencyErrorFields.add(field) # print('here') # else: # print("kuch to gadbad hai iss field me:- ", field, "\n ", self.articleExtraFields) if (self.articleExtraFields.issubset(tempFields)==False): consistencyErrorFields.union(tempFields-self.articleExtraFields) self.articleCount+=1 return importantErrorFields, consistencyErrorFields ####################################################################################################################################################### def checkInbook(self, entry: dict): allFields = set(entry.keys()) metadata = {'ID', 'number', 'ENTRYTYPE', 'date', 'doi'} allFields = allFields-metadata # consistency error list consistencyErrorFields = set() # important fields error list importantErrorFields = set() if(self.inbookImportantFields.issubset(allFields)==False): # find the asterik fields present or not importantErrorFields = self.inbookImportantFields-allFields.intersection(self.inbookImportantFields) # code to check consistency tempFields = allFields-self.inbookImportantFields for field in tempFields: if(self.inbookCount == 0 and (field in self.inbookExtraFields)==False): self.inbookExtraFields.add(field) elif field not in self.inbookExtraFields: consistencyErrorFields.add(field) # else: # print("kuch to gadbad hai iss field me:- ", field) if (self.inbookExtraFields.issubset(tempFields)==False): consistencyErrorFields.union(tempFields-self.inbookExtraFields) self.inbookCount+=1 return importantErrorFields, consistencyErrorFields ####################################################################################################################################################### def checkTechreport(self, entry: dict): allFields = set(entry.keys()) metadata = {'ID', 'ENTRYTYPE', 'date'} allFields = allFields-metadata # consistency error list consistencyErrorFields = set() # important fields error list importantErrorFields = set() if(self.techreportImportantFields.issubset(allFields)==False): # find the asterik fields present or not importantErrorFields = self.techreportImportantFields-allFields.intersection(self.techreportImportantFields) # code to check consistency tempFields = allFields-self.techreportImportantFields for field in tempFields: if(self.techreportCount == 0 and (field in self.techreportExtraFields)==False): self.techreportExtraFields.add(field) elif field not in self.techreportExtraFields: consistencyErrorFields.add(field) # else: # print("kuch to gadbad hai iss field me:- ", field) if (self.techreportExtraFields.issubset(tempFields)==False): consistencyErrorFields.union(tempFields-self.techreportExtraFields) self.techreportCount+=1 return importantErrorFields, consistencyErrorFields def checkInproceedings(self, entry: dict): allFields = set(entry.keys()) metadata = {'ID', 'ENTRYTYPE', 'date', 'series'} allFields = allFields-metadata # consistency error list consistencyErrorFields = set() # important fields error list importantErrorFields = set() if(self.inproceedingsImportantFields.issubset(allFields)==False): # find the asterik fields present or not importantErrorFields = self.inproceedingsImportantFields-allFields.intersection(self.inproceedingsImportantFields) # code to check consistency tempFields = allFields-self.inproceedingsImportantFields for field in tempFields: if(self.inproceedingscount == 0 and (field not in self.inproceedingsExtraFields)): self.inproceedingsExtraFields.add(field) elif field not in self.inproceedingsExtraFields: consistencyErrorFields.add(field) if (self.inproceedingsExtraFields.issubset(tempFields)==False): consistencyErrorFields.union(tempFields-self.inproceedingsExtraFields) self.inproceedingscount+=1 return importantErrorFields, consistencyErrorFields def checkMisc(): pass def bibtex_to_dict_list(bibtex_string): # Parse the BibTeX string bib_database = bibtexparser.loads(bibtex_string) # Convert each entry to a dictionary and return as a list return [dict(entry) for entry in bib_database.entries] def referenceErrorParser(bibtex_string: str, coordinateStr: str): print("Bibtex: \n \n"+bibtex_string, '\n', "Coordinates: \n \n"+coordinateStr) data = json.loads(coordinateStr) checker = ConsistencyHandler() entries = bibtex_to_dict_list(bibtex_string) # for finding Errors for entry in entries: # article if entry['ENTRYTYPE']=="article": imp, cons = checker.checkArticles(entry) entry['asterikError'] = list(imp) entry['consistencyError'] = list(cons) # inbook elif entry['ENTRYTYPE']=="inbook": imp, cons = checker.checkInbook(entry) entry['asterikError'] = list(imp) entry['consistencyError'] = list(cons) # techreport # elif entry['ENTRYTYPE']=="techreport": # imp, cons = checker.checkTechreport(entry) # entry['asterikError'] = list(imp) # entry['consistencyError'] = list(cons) # inproceedings elif entry['ENTRYTYPE']=="inproceedings": imp, cons = checker.checkInproceedings(entry) entry['asterikError'] = list(imp) entry['consistencyError'] = list(cons) # misc elif entry['ENTRYTYPE']=="misc": # return warning for this section entry['warningMessage'] = "For miscellaneous fields, certain key details must be carefully included when citing, such as the year, volume, author, publisher, company or organization, and the link (especially when citing a website), among others." else: print(f"{entry['ENTRYTYPE']} has to be added for processing.") # For giving coordinates for i in range(len(entries)): entries[i]['pos']=data['refBibs'][i]['pos'] outputJson = json.dumps(entries) return outputJson def create_interface(): with gr.Blocks(title="BibTex Analyser") as interface: gr.Markdown("# Reference Validation") with gr.Row(): text_input1 = gr.TextArea() with gr.Row(): text_input2 = gr.TextArea() with gr.Row(): analyze_btn = gr.Button("Get Corrections") with gr.Row(): results_output = gr.TextArea( label="Analysis Results", show_label=True ) analyze_btn.click( fn=referenceErrorParser, inputs=[text_input1, text_input2], outputs=results_output ) return interface if __name__ == "__main__": interface = create_interface() interface.launch( share=False, # Set to False in production )