Spaces:
Runtime error
Runtime error
| from gliner import GLiNER | |
| import re | |
| import fitz | |
| import gradio as gr | |
| model = GLiNER.from_pretrained("gliner-community/gliner_large-v2.5", load_tokenizer=True) | |
| def clean_text(text): | |
| # Remove all escape characters | |
| cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text) | |
| # Remove any other non-printable characters | |
| cleaned_text = re.sub(r'[^\x20-\x7E]', '', cleaned_text) | |
| # Replace multiple spaces with a single space | |
| cleaned_text = re.sub(r'\s+', ' ', cleaned_text) | |
| # Strip leading and trailing whitespace | |
| cleaned_text = cleaned_text.strip() | |
| return cleaned_text | |
| def pdf2text(file_path): | |
| with fitz.open(file_path) as doc: | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return clean_text(text) | |
| def ner(text, labels, threshold) : | |
| labels = labels.split(",") | |
| labels = [label.strip() for label in labels] | |
| print(labels) | |
| return { | |
| "text": text, | |
| "entities": [ | |
| { | |
| "entity": entity["label"], | |
| "word": entity["text"], | |
| "start": entity["start"], | |
| "end": entity["end"], | |
| "score": 0, | |
| } | |
| for entity in model.predict_entities( | |
| text, labels, flat_ner=True, threshold=threshold | |
| ) | |
| ], | |
| } | |
| def parser(file_path, labels, threshold): | |
| text = pdf2text(file_path) | |
| return ner(text, labels, threshold) | |
| # Define a custom CSS style | |
| custom_css = """ | |
| body { | |
| background-color: #f0f8ff; | |
| font-family: 'Arial', sans-serif; | |
| } | |
| .container { | |
| margin: auto; | |
| padding: 20px; | |
| border-radius: 10px; | |
| box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
| } | |
| h1 { | |
| color: #3d1ad9; | |
| text-align: center; | |
| } | |
| #file_upload { | |
| display: flex; | |
| justify-content: center; | |
| margin-bottom: 20px; | |
| } | |
| """ | |
| with gr.Blocks(css=custom_css) as demo: | |
| gr.HTML("<h1>AI-Powered Resume Parser</h1>") | |
| gr.HTML("<p style='text-align: center;'>This application extracts important data from your resume using innovative NLP methods. This tool's key advantage is that, in contrast to conventional resume parsers, it is generalized(Thanks to GLiNER team), meaning it functions in accordance with your needs. Simply enter the labels (NER) that you wish to extract, then adjust the threshold and submit the resume. Magic will happen in a few seconds.</p>") | |
| with gr.Row() as row: | |
| labels = gr.Textbox( | |
| label="Labels", | |
| placeholder="Enter your labels here (comma separated)", | |
| scale=2, | |
| ) | |
| threshold = gr.Slider( | |
| 0, | |
| 1, | |
| value=0.3, | |
| step=0.01, | |
| label="Threshold", | |
| info="Lower the threshold to increase how many entities get predicted.", | |
| scale=0, | |
| ) | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload Resume", | |
| file_types=['.pdf'], | |
| elem_id="file_upload" | |
| ) | |
| with gr.Row(): | |
| parse_button = gr.Button("Parse Resume") | |
| with gr.Row(): | |
| output = gr.HighlightedText(label="Parsed Resume", | |
| combine_adjacent=True | |
| ) | |
| parse_button.click(fn=parser, inputs=[file_input,labels, threshold], outputs=output) | |
| gr.HTML("<p style='text-align: center;'>Our resume parser can identify and extract important details such as personal information, education, work experience, skills, and more. Simply upload your resume and let our AI do the work!</p>") | |
| # Launch the interface | |
| demo.queue() | |
| demo.launch(share=True, debug=True) | |