Spaces:

AhmedEwis
/

threads_data_to_excel

Runtime error

threads_data_to_excel / app.py

Update app.py

79742e4 over 2 years ago

1.68 kB

	# -- coding: utf-8 --
	"""Untitled60.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1if8bXKbgk5lh_oSOZ6Vf6dZ8m7MZ1B-p
	"""

	import gradio as gr
	import pandas as pd
	from PyPDF2 import PdfReader
	import re
	import os

	def process_pdf(file):
	# Load the PDF
	pdf = PdfReader(file.name)

	data = []

	# File path regex pattern
	file_path_pattern = re.compile(r'.\/.\..*$')

	# Set a flag to start collecting data
	start_collecting = False

	# Iterate over all pages in the PDF
	for page in pdf.pages:
	text = page.extract_text()
	lines = text.split('\n')

	# Process lines that do not contain 'AM', 'PM', or match the file path pattern
	for line in lines:
	if "Accounts that follow you in Threads" in line:
	start_collecting = True
	continue

	if start_collecting and line.strip() and 'AM' not in line and 'PM' not in line and not file_path_pattern.match(line):
	data.append(line)

	# Convert the data into a pandas DataFrame
	df = pd.DataFrame(data, columns=['Username'])

	# Save DataFrame to Excel
	output_file = "Followers_output.xlsx"
	df.to_excel(output_file, index=False)
	return output_file


	# Define the Gradio interface
	iface = gr.Interface(fn=process_pdf,
	inputs="file",
	outputs=gr.outputs.File(label="Output Excel"),
	title="PDF threads data to Excel",
	description="Hi Humood! Just Upload the PDF file of threads data and get an Excel file with usernames.")

	iface.launch()