qtpi's picture
Update app.py
08b986d verified
import bibtexparser
import json
import gradio as gr
class ConsistencyHandler:
def __init__(self):
self.articleExtraFields = set()
self.inbookExtraFields = set()
self.techreportExtraFields = set()
self.inproceedingsExtraFields = set()
# self.miscExtraFields = set()
self.articleImportantFields = {'year', 'author', 'title', 'journal', 'volume', 'pages'}
self.inbookImportantFields = {'author', 'year', 'booktitle'} # to check for publisher, title
self.techreportImportantFields = {'title', 'author', 'institution', 'year', 'number'}
self.inproceedingsImportantFields = {'author', 'title', 'booktitle', 'year', 'pages'} # to check for publisher
# self.miscImportantFields = set()
self.articleCount = 0
self.inbookCount = 0
self.techreportCount = 0
self.inproceedingscount = 0
# self.miscCount = 0
#######################################################################################################################################################
def checkArticles(self, entry: dict):
allFields = set(entry.keys())
metadata = {'ID', 'number', 'ENTRYTYPE', 'date'}
allFields = allFields-metadata
# consistency error list
consistencyErrorFields = set()
# important fields error list
importantErrorFields = set()
if(self.articleImportantFields.issubset(allFields)==False):
# find the asterik fields present or not
importantErrorFields = self.articleImportantFields-allFields.intersection(self.articleImportantFields)
# code to check consistency
tempFields = allFields-self.articleImportantFields
for field in tempFields:
if(self.articleCount == 0 and (field not in self.articleExtraFields)):
self.articleExtraFields.add(field)
elif (field not in self.articleExtraFields):
consistencyErrorFields.add(field)
# print('here')
# else:
# print("kuch to gadbad hai iss field me:- ", field, "\n ", self.articleExtraFields)
if (self.articleExtraFields.issubset(tempFields)==False):
consistencyErrorFields.union(tempFields-self.articleExtraFields)
self.articleCount+=1
return importantErrorFields, consistencyErrorFields
#######################################################################################################################################################
def checkInbook(self, entry: dict):
allFields = set(entry.keys())
metadata = {'ID', 'number', 'ENTRYTYPE', 'date', 'doi'}
allFields = allFields-metadata
# consistency error list
consistencyErrorFields = set()
# important fields error list
importantErrorFields = set()
if(self.inbookImportantFields.issubset(allFields)==False):
# find the asterik fields present or not
importantErrorFields = self.inbookImportantFields-allFields.intersection(self.inbookImportantFields)
# code to check consistency
tempFields = allFields-self.inbookImportantFields
for field in tempFields:
if(self.inbookCount == 0 and (field in self.inbookExtraFields)==False):
self.inbookExtraFields.add(field)
elif field not in self.inbookExtraFields:
consistencyErrorFields.add(field)
# else:
# print("kuch to gadbad hai iss field me:- ", field)
if (self.inbookExtraFields.issubset(tempFields)==False):
consistencyErrorFields.union(tempFields-self.inbookExtraFields)
self.inbookCount+=1
return importantErrorFields, consistencyErrorFields
#######################################################################################################################################################
def checkTechreport(self, entry: dict):
allFields = set(entry.keys())
metadata = {'ID', 'ENTRYTYPE', 'date'}
allFields = allFields-metadata
# consistency error list
consistencyErrorFields = set()
# important fields error list
importantErrorFields = set()
if(self.techreportImportantFields.issubset(allFields)==False):
# find the asterik fields present or not
importantErrorFields = self.techreportImportantFields-allFields.intersection(self.techreportImportantFields)
# code to check consistency
tempFields = allFields-self.techreportImportantFields
for field in tempFields:
if(self.techreportCount == 0 and (field in self.techreportExtraFields)==False):
self.techreportExtraFields.add(field)
elif field not in self.techreportExtraFields:
consistencyErrorFields.add(field)
# else:
# print("kuch to gadbad hai iss field me:- ", field)
if (self.techreportExtraFields.issubset(tempFields)==False):
consistencyErrorFields.union(tempFields-self.techreportExtraFields)
self.techreportCount+=1
return importantErrorFields, consistencyErrorFields
def checkInproceedings(self, entry: dict):
allFields = set(entry.keys())
metadata = {'ID', 'ENTRYTYPE', 'date', 'series'}
allFields = allFields-metadata
# consistency error list
consistencyErrorFields = set()
# important fields error list
importantErrorFields = set()
if(self.inproceedingsImportantFields.issubset(allFields)==False):
# find the asterik fields present or not
importantErrorFields = self.inproceedingsImportantFields-allFields.intersection(self.inproceedingsImportantFields)
# code to check consistency
tempFields = allFields-self.inproceedingsImportantFields
for field in tempFields:
if(self.inproceedingscount == 0 and (field not in self.inproceedingsExtraFields)):
self.inproceedingsExtraFields.add(field)
elif field not in self.inproceedingsExtraFields:
consistencyErrorFields.add(field)
if (self.inproceedingsExtraFields.issubset(tempFields)==False):
consistencyErrorFields.union(tempFields-self.inproceedingsExtraFields)
self.inproceedingscount+=1
return importantErrorFields, consistencyErrorFields
def checkMisc():
pass
def bibtex_to_dict_list(bibtex_string):
# Parse the BibTeX string
bib_database = bibtexparser.loads(bibtex_string)
# Convert each entry to a dictionary and return as a list
return [dict(entry) for entry in bib_database.entries]
def referenceErrorParser(bibtex_string: str, coordinateStr: str):
print("Bibtex: \n \n"+bibtex_string, '\n', "Coordinates: \n \n"+coordinateStr)
data = json.loads(coordinateStr)
checker = ConsistencyHandler()
entries = bibtex_to_dict_list(bibtex_string)
# for finding Errors
for entry in entries:
# article
if entry['ENTRYTYPE']=="article":
imp, cons = checker.checkArticles(entry)
entry['asterikError'] = list(imp)
entry['consistencyError'] = list(cons)
# inbook
elif entry['ENTRYTYPE']=="inbook":
imp, cons = checker.checkInbook(entry)
entry['asterikError'] = list(imp)
entry['consistencyError'] = list(cons)
# techreport
# elif entry['ENTRYTYPE']=="techreport":
# imp, cons = checker.checkTechreport(entry)
# entry['asterikError'] = list(imp)
# entry['consistencyError'] = list(cons)
# inproceedings
elif entry['ENTRYTYPE']=="inproceedings":
imp, cons = checker.checkInproceedings(entry)
entry['asterikError'] = list(imp)
entry['consistencyError'] = list(cons)
# misc
elif entry['ENTRYTYPE']=="misc":
# return warning for this section
entry['warningMessage'] = "For miscellaneous fields, certain key details must be carefully included when citing, such as the year, volume, author, publisher, company or organization, and the link (especially when citing a website), among others."
else:
print(f"{entry['ENTRYTYPE']} has to be added for processing.")
# For giving coordinates
for i in range(len(entries)):
entries[i]['pos']=data['refBibs'][i]['pos']
outputJson = json.dumps(entries)
return outputJson
def create_interface():
with gr.Blocks(title="BibTex Analyser") as interface:
gr.Markdown("# Reference Validation")
with gr.Row():
text_input1 = gr.TextArea()
with gr.Row():
text_input2 = gr.TextArea()
with gr.Row():
analyze_btn = gr.Button("Get Corrections")
with gr.Row():
results_output = gr.TextArea(
label="Analysis Results",
show_label=True
)
analyze_btn.click(
fn=referenceErrorParser,
inputs=[text_input1, text_input2],
outputs=results_output
)
return interface
if __name__ == "__main__":
interface = create_interface()
interface.launch(
share=False, # Set to False in production
)