Spaces:

mkoot007
/

Classification-testing

Runtime error

App Files Files Community

Classification-testing / app.py

mkoot007

Update app.py

80de488 over 2 years ago

raw

history blame

1.92 kB

	import gradio as gr
	import re
	from docx import Document
	from PyPDF2 import PdfReader # Use PdfReader from PyPDF2

	# Function to extract text from a PDF file
	def extract_text_from_pdf(pdf_file):
	text = ""
	pdf = PdfReader(pdf_file)
	for page in pdf.pages:
	text += page.extract_text()
	return text

	# Function to extract text from a DOCX file
	def extract_text_from_docx(docx_file):
	doc = Document(docx_file)
	text = "\n".join([para.text for para in doc.paragraphs])
	return text
	def extract_text_from_pdf(pdf_file):
	text = ""
	pdf = PdfReader(pdf_file)

	if not pdf.pages:
	raise ValueError("The PDF file is empty.")

	for page in pdf.pages:
	text += page.extract_text()
	return text
	# Function to extract information from a resume
	def extract_info_from_resume(resume_path):
	if resume_path.name.endswith('.pdf'):
	text = extract_text_from_pdf(resume_path)
	elif resume_path.name.endswith('.docx'):
	text = extract_text_from_docx(resume_path)
	else:
	raise ValueError("Unsupported file format. Only PDF and DOCX are supported.")

	# Define regular expressions to extract information
	name_pattern = r"([A-Z][a-z]+(?: [A-Z][a-z]+)+)"
	email_pattern = r"[\w\.-]+@[\w\.-]+"
	phone_pattern = r"(\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})"

	name = re.search(name_pattern, text)
	email = re.search(email_pattern, text)
	phone = re.search(phone_pattern, text)

	extracted_info = {
	"Name": name.group() if name else "Name not found",
	"Email": email.group() if email else "Email not found",
	"Phone": phone.group() if phone else "Phone number not found",
	}

	return extracted_info

	# Define a Gradio interface
	iface = gr.Interface(
	fn=extract_info_from_resume,
	inputs=gr.inputs.File(type="file"),
	outputs="json"
	)

	# Deploy the Gradio interface
	iface.launch(share=True)