qtpi's picture
Create app.py
a47277e verified
raw
history blame
8.98 kB
import bibtexparser
import json
class ConsistencyHandler:
def __init__(self):
self.articleExtraFields = set()
self.inbookExtraFields = set()
self.techreportExtraFields = set()
self.inproceedingsExtraFields = set()
# self.miscExtraFields = set()
self.articleImportantFields = {'year', 'author', 'title', 'journal', 'volume', 'pages'}
self.inbookImportantFields = {'author', 'year', 'title', 'booktitle', 'publisher'}
self.techreportImportantFields = {'title', 'author', 'institution', 'year', 'number'}
self.inproceedingsImportantFields = {'author', 'title', 'booktitle', 'year', 'publisher', 'pages'}
# self.miscImportantFields = set()
self.articleCount = 0
self.inbookCount = 0
self.techreportCount = 0
self.inproceedingscount = 0
# self.miscCount = 0
#######################################################################################################################################################
def checkArticles(self, entry: dict):
allFields = set(entry.keys())
metadata = {'ID', 'number', 'ENTRYTYPE', 'date'}
allFields = allFields-metadata
# consistency error list
consistencyErrorFields = set()
# important fields error list
importantErrorFields = set()
if(self.articleImportantFields.issubset(allFields)==False):
# find the asterik fields present or not
importantErrorFields = self.articleImportantFields-allFields.intersection(self.articleImportantFields)
# code to check consistency
tempFields = allFields-self.articleImportantFields
for field in tempFields:
if(self.articleCount == 0 and (field not in self.articleExtraFields)):
self.articleExtraFields.add(field)
elif (field not in self.articleExtraFields):
consistencyErrorFields.add(field)
# print('here')
# else:
# print("kuch to gadbad hai iss field me:- ", field, "\n ", self.articleExtraFields)
if (self.articleExtraFields.issubset(tempFields)==False):
consistencyErrorFields.union(tempFields-self.articleExtraFields)
self.articleCount+=1
return importantErrorFields, consistencyErrorFields
#######################################################################################################################################################
def checkInbook(self, entry: dict):
allFields = set(entry.keys())
metadata = {'ID', 'number', 'ENTRYTYPE', 'date', 'doi'}
allFields = allFields-metadata
# consistency error list
consistencyErrorFields = set()
# important fields error list
importantErrorFields = set()
if(self.inbookImportantFields.issubset(allFields)==False):
# find the asterik fields present or not
importantErrorFields = self.inbookImportantFields-allFields.intersection(self.inbookImportantFields)
# code to check consistency
tempFields = allFields-self.inbookImportantFields
for field in tempFields:
if(self.inbookCount == 0 and (field in self.inbookExtraFields)==False):
self.inbookExtraFields.add(field)
elif field not in self.inbookExtraFields:
consistencyErrorFields.add(field)
# else:
# print("kuch to gadbad hai iss field me:- ", field)
if (self.inbookExtraFields.issubset(tempFields)==False):
consistencyErrorFields.union(tempFields-self.inbookExtraFields)
self.inbookCount+=1
return importantErrorFields, consistencyErrorFields
#######################################################################################################################################################
def checkTechreport(self, entry: dict):
allFields = set(entry.keys())
metadata = {'ID', 'ENTRYTYPE', 'date'}
allFields = allFields-metadata
# consistency error list
consistencyErrorFields = set()
# important fields error list
importantErrorFields = set()
if(self.techreportImportantFields.issubset(allFields)==False):
# find the asterik fields present or not
importantErrorFields = self.techreportImportantFields-allFields.intersection(self.techreportImportantFields)
# code to check consistency
tempFields = allFields-self.techreportImportantFields
for field in tempFields:
if(self.techreportCount == 0 and (field in self.techreportExtraFields)==False):
self.techreportExtraFields.add(field)
elif field not in self.techreportExtraFields:
consistencyErrorFields.add(field)
# else:
# print("kuch to gadbad hai iss field me:- ", field)
if (self.techreportExtraFields.issubset(tempFields)==False):
consistencyErrorFields.union(tempFields-self.techreportExtraFields)
self.techreportCount+=1
return importantErrorFields, consistencyErrorFields
def checkInproceedings(self, entry: dict):
allFields = set(entry.keys())
metadata = {'ID', 'ENTRYTYPE', 'date', 'series'}
allFields = allFields-metadata
# consistency error list
consistencyErrorFields = set()
# important fields error list
importantErrorFields = set()
if(self.inproceedingsImportantFields.issubset(allFields)==False):
# find the asterik fields present or not
importantErrorFields = self.inproceedingsImportantFields-allFields.intersection(self.inproceedingsImportantFields)
# code to check consistency
tempFields = allFields-self.inproceedingsImportantFields
for field in tempFields:
if(self.inproceedingscount == 0 and (field not in self.inproceedingsExtraFields)):
self.inproceedingsExtraFields.add(field)
elif field not in self.inproceedingsExtraFields:
consistencyErrorFields.add(field)
if (self.inproceedingsExtraFields.issubset(tempFields)==False):
consistencyErrorFields.union(tempFields-self.inproceedingsExtraFields)
self.inproceedingscount+=1
return importantErrorFields, consistencyErrorFields
def checkMisc():
pass
def bibtex_to_dict_list(bibtex_string):
# Parse the BibTeX string
bib_database = bibtexparser.loads(bibtex_string)
# Convert each entry to a dictionary and return as a list
return [dict(entry) for entry in bib_database.entries]
def referenceErrorParser(bibtex_string: str):
entries = bibtex_to_dict_list(bibtex_string)
for entry in entries:
# article
if entry['ENTRYTYPE']=="article":
imp, cons = checker.checkArticles(entry)
entry['asterikError'] = list(imp)
entry['consistensyError'] = list(cons)
# inbook
elif entry['ENTRYTYPE']=="inbook":
imp, cons = checker.checkInbook(entry)
entry['asterikError'] = list(imp)
entry['consistensyError'] = list(cons)
# techreport
elif entry['ENTRYTYPE']=="techreport":
imp, cons = checker.checkTechreport(entry)
entry['asterikError'] = list(imp)
entry['consistensyError'] = list(cons)
# inproceedings
elif entry['ENTRYTYPE']=="inproceedings":
imp, cons = checker.checkInproceedings(entry)
entry['asterikError'] = list(imp)
entry['consistensyError'] = list(cons)
# misc
elif entry['ENTRYTYPE']=="misc":
# return warning for this section
continue
else:
print(f"{entry['ENTRYTYPE']} has to be added for processing.")
outputJson = json.dumps(entries)
return outputJson
def create_interface():
with gr.Blocks(title="BibTex Analyser") as interface:
gr.Markdown("# Reference Validation")
with gr.Row():
text_input = gr.TextArea()
with gr.Row():
analyze_btn = gr.Button("Get Corrections")
with gr.Row():
results_output = gr.TextArea(
label="Analysis Results",
show_label=True
)
analyze_btn.click(
fn=referenceErrorParser,
inputs=text_input,
outputs=results_output
)
return interface
if __name__ == "__main__":
interface = create_interface()
interface.launch(
share=False, # Set to False in production
)