Spaces:
Sleeping
Sleeping
| import bibtexparser | |
| import json | |
| import gradio as gr | |
| class ConsistencyHandler: | |
| def __init__(self): | |
| self.articleExtraFields = set() | |
| self.inbookExtraFields = set() | |
| self.techreportExtraFields = set() | |
| self.inproceedingsExtraFields = set() | |
| # self.miscExtraFields = set() | |
| self.articleImportantFields = {'year', 'author', 'title', 'journal', 'volume', 'pages'} | |
| self.inbookImportantFields = {'author', 'year', 'booktitle'} # to check for publisher, title | |
| self.techreportImportantFields = {'title', 'author', 'institution', 'year', 'number'} | |
| self.inproceedingsImportantFields = {'author', 'title', 'booktitle', 'year', 'pages'} # to check for publisher | |
| # self.miscImportantFields = set() | |
| self.articleCount = 0 | |
| self.inbookCount = 0 | |
| self.techreportCount = 0 | |
| self.inproceedingscount = 0 | |
| # self.miscCount = 0 | |
| ####################################################################################################################################################### | |
| def checkArticles(self, entry: dict): | |
| allFields = set(entry.keys()) | |
| metadata = {'ID', 'number', 'ENTRYTYPE', 'date'} | |
| allFields = allFields-metadata | |
| # consistency error list | |
| consistencyErrorFields = set() | |
| # important fields error list | |
| importantErrorFields = set() | |
| if(self.articleImportantFields.issubset(allFields)==False): | |
| # find the asterik fields present or not | |
| importantErrorFields = self.articleImportantFields-allFields.intersection(self.articleImportantFields) | |
| # code to check consistency | |
| tempFields = allFields-self.articleImportantFields | |
| for field in tempFields: | |
| if(self.articleCount == 0 and (field not in self.articleExtraFields)): | |
| self.articleExtraFields.add(field) | |
| elif (field not in self.articleExtraFields): | |
| consistencyErrorFields.add(field) | |
| # print('here') | |
| # else: | |
| # print("kuch to gadbad hai iss field me:- ", field, "\n ", self.articleExtraFields) | |
| if (self.articleExtraFields.issubset(tempFields)==False): | |
| consistencyErrorFields.union(tempFields-self.articleExtraFields) | |
| self.articleCount+=1 | |
| return importantErrorFields, consistencyErrorFields | |
| ####################################################################################################################################################### | |
| def checkInbook(self, entry: dict): | |
| allFields = set(entry.keys()) | |
| metadata = {'ID', 'number', 'ENTRYTYPE', 'date', 'doi'} | |
| allFields = allFields-metadata | |
| # consistency error list | |
| consistencyErrorFields = set() | |
| # important fields error list | |
| importantErrorFields = set() | |
| if(self.inbookImportantFields.issubset(allFields)==False): | |
| # find the asterik fields present or not | |
| importantErrorFields = self.inbookImportantFields-allFields.intersection(self.inbookImportantFields) | |
| # code to check consistency | |
| tempFields = allFields-self.inbookImportantFields | |
| for field in tempFields: | |
| if(self.inbookCount == 0 and (field in self.inbookExtraFields)==False): | |
| self.inbookExtraFields.add(field) | |
| elif field not in self.inbookExtraFields: | |
| consistencyErrorFields.add(field) | |
| # else: | |
| # print("kuch to gadbad hai iss field me:- ", field) | |
| if (self.inbookExtraFields.issubset(tempFields)==False): | |
| consistencyErrorFields.union(tempFields-self.inbookExtraFields) | |
| self.inbookCount+=1 | |
| return importantErrorFields, consistencyErrorFields | |
| ####################################################################################################################################################### | |
| def checkTechreport(self, entry: dict): | |
| allFields = set(entry.keys()) | |
| metadata = {'ID', 'ENTRYTYPE', 'date'} | |
| allFields = allFields-metadata | |
| # consistency error list | |
| consistencyErrorFields = set() | |
| # important fields error list | |
| importantErrorFields = set() | |
| if(self.techreportImportantFields.issubset(allFields)==False): | |
| # find the asterik fields present or not | |
| importantErrorFields = self.techreportImportantFields-allFields.intersection(self.techreportImportantFields) | |
| # code to check consistency | |
| tempFields = allFields-self.techreportImportantFields | |
| for field in tempFields: | |
| if(self.techreportCount == 0 and (field in self.techreportExtraFields)==False): | |
| self.techreportExtraFields.add(field) | |
| elif field not in self.techreportExtraFields: | |
| consistencyErrorFields.add(field) | |
| # else: | |
| # print("kuch to gadbad hai iss field me:- ", field) | |
| if (self.techreportExtraFields.issubset(tempFields)==False): | |
| consistencyErrorFields.union(tempFields-self.techreportExtraFields) | |
| self.techreportCount+=1 | |
| return importantErrorFields, consistencyErrorFields | |
| def checkInproceedings(self, entry: dict): | |
| allFields = set(entry.keys()) | |
| metadata = {'ID', 'ENTRYTYPE', 'date', 'series'} | |
| allFields = allFields-metadata | |
| # consistency error list | |
| consistencyErrorFields = set() | |
| # important fields error list | |
| importantErrorFields = set() | |
| if(self.inproceedingsImportantFields.issubset(allFields)==False): | |
| # find the asterik fields present or not | |
| importantErrorFields = self.inproceedingsImportantFields-allFields.intersection(self.inproceedingsImportantFields) | |
| # code to check consistency | |
| tempFields = allFields-self.inproceedingsImportantFields | |
| for field in tempFields: | |
| if(self.inproceedingscount == 0 and (field not in self.inproceedingsExtraFields)): | |
| self.inproceedingsExtraFields.add(field) | |
| elif field not in self.inproceedingsExtraFields: | |
| consistencyErrorFields.add(field) | |
| if (self.inproceedingsExtraFields.issubset(tempFields)==False): | |
| consistencyErrorFields.union(tempFields-self.inproceedingsExtraFields) | |
| self.inproceedingscount+=1 | |
| return importantErrorFields, consistencyErrorFields | |
| def checkMisc(): | |
| pass | |
| def bibtex_to_dict_list(bibtex_string): | |
| # Parse the BibTeX string | |
| bib_database = bibtexparser.loads(bibtex_string) | |
| # Convert each entry to a dictionary and return as a list | |
| return [dict(entry) for entry in bib_database.entries] | |
| def referenceErrorParser(bibtex_string: str, coordinateStr: str): | |
| print("Bibtex: \n \n"+bibtex_string, '\n', "Coordinates: \n \n"+coordinateStr) | |
| data = json.loads(coordinateStr) | |
| checker = ConsistencyHandler() | |
| entries = bibtex_to_dict_list(bibtex_string) | |
| # for finding Errors | |
| for entry in entries: | |
| # article | |
| if entry['ENTRYTYPE']=="article": | |
| imp, cons = checker.checkArticles(entry) | |
| entry['asterikError'] = list(imp) | |
| entry['consistencyError'] = list(cons) | |
| # inbook | |
| elif entry['ENTRYTYPE']=="inbook": | |
| imp, cons = checker.checkInbook(entry) | |
| entry['asterikError'] = list(imp) | |
| entry['consistencyError'] = list(cons) | |
| # techreport | |
| # elif entry['ENTRYTYPE']=="techreport": | |
| # imp, cons = checker.checkTechreport(entry) | |
| # entry['asterikError'] = list(imp) | |
| # entry['consistencyError'] = list(cons) | |
| # inproceedings | |
| elif entry['ENTRYTYPE']=="inproceedings": | |
| imp, cons = checker.checkInproceedings(entry) | |
| entry['asterikError'] = list(imp) | |
| entry['consistencyError'] = list(cons) | |
| # misc | |
| elif entry['ENTRYTYPE']=="misc": | |
| # return warning for this section | |
| entry['warningMessage'] = "For miscellaneous fields, certain key details must be carefully included when citing, such as the year, volume, author, publisher, company or organization, and the link (especially when citing a website), among others." | |
| else: | |
| print(f"{entry['ENTRYTYPE']} has to be added for processing.") | |
| # For giving coordinates | |
| for i in range(len(entries)): | |
| entries[i]['pos']=data['refBibs'][i]['pos'] | |
| outputJson = json.dumps(entries) | |
| return outputJson | |
| def create_interface(): | |
| with gr.Blocks(title="BibTex Analyser") as interface: | |
| gr.Markdown("# Reference Validation") | |
| with gr.Row(): | |
| text_input1 = gr.TextArea() | |
| with gr.Row(): | |
| text_input2 = gr.TextArea() | |
| with gr.Row(): | |
| analyze_btn = gr.Button("Get Corrections") | |
| with gr.Row(): | |
| results_output = gr.TextArea( | |
| label="Analysis Results", | |
| show_label=True | |
| ) | |
| analyze_btn.click( | |
| fn=referenceErrorParser, | |
| inputs=[text_input1, text_input2], | |
| outputs=results_output | |
| ) | |
| return interface | |
| if __name__ == "__main__": | |
| interface = create_interface() | |
| interface.launch( | |
| share=False, # Set to False in production | |
| ) |