Spaces:

mkoot007
/

Classification-testing

Runtime error

App Files Files Community

mkoot007 commited on Nov 7, 2023

Commit

73f7377

1 Parent(s): 80de488

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -60

app.py CHANGED Viewed

@@ -1,63 +1,11 @@
-import gradio as gr
-import re
-from docx import Document
-from PyPDF2 import PdfReader  # Use PdfReader from PyPDF2
-# Function to extract text from a PDF file
-def extract_text_from_pdf(pdf_file):
-    text = ""
-    pdf = PdfReader(pdf_file)
-    for page in pdf.pages:
-        text += page.extract_text()
-    return text
-# Function to extract text from a DOCX file
-def extract_text_from_docx(docx_file):
-    doc = Document(docx_file)
-    text = "\n".join([para.text for para in doc.paragraphs])
-    return text
-def extract_text_from_pdf(pdf_file):
-    text = ""
-    pdf = PdfReader(pdf_file)
-    if not pdf.pages:
-        raise ValueError("The PDF file is empty.")
-    for page in pdf.pages:
-        text += page.extract_text()
-    return text
-# Function to extract information from a resume
-def extract_info_from_resume(resume_path):
-    if resume_path.name.endswith('.pdf'):
-        text = extract_text_from_pdf(resume_path)
-    elif resume_path.name.endswith('.docx'):
-        text = extract_text_from_docx(resume_path)
-    else:
-        raise ValueError("Unsupported file format. Only PDF and DOCX are supported.")
-    # Define regular expressions to extract information
-    name_pattern = r"([A-Z][a-z]+(?: [A-Z][a-z]+)+)"
-    email_pattern = r"[\w\.-]+@[\w\.-]+"
-    phone_pattern = r"(\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})"
-    name = re.search(name_pattern, text)
-    email = re.search(email_pattern, text)
-    phone = re.search(phone_pattern, text)
-    extracted_info = {
-        "Name": name.group() if name else "Name not found",
-        "Email": email.group() if email else "Email not found",
-        "Phone": phone.group() if phone else "Phone number not found",
-    }
-    return extracted_info
-# Define a Gradio interface
-iface = gr.Interface(
-    fn=extract_info_from_resume,
-    inputs=gr.inputs.File(type="file"),
-    outputs="json"
-)
-# Deploy the Gradio interface
-iface.launch(share=True)

+from transformers import AutoTokenizer, AutoModelForTokenClassification
+from transformers import pipeline
+tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
+model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
+nlp = pipeline("ner", model=model, tokenizer=tokenizer)
+example = "My name is Wolfgang and I live in Berlin"
+ner_results = nlp(example)
+print(ner_results)