Spaces:

prudhviLatha
/

pv

Sleeping

App Files Files Community

pv / app.py

prudhviLatha

Update app.py

71749cd verified 11 months ago

raw

history blame contribute delete

5.36 kB

	import os
	import logging
	import gradio as gr
	import pandas as pd
	import numpy as np
	from datetime import datetime
	from PyPDF2 import PdfReader
	import matplotlib.pyplot as plt
	from simple_salesforce import Salesforce

	# Setup logging
	logging.basicConfig(level=logging.INFO)

	# Function to extract text from PDF using PyPDF2
	def extract_text_from_pdf(pdf_path):
	try:
	with open(pdf_path, "rb") as file:
	pdf = PdfReader(file)
	text = ""
	for page in pdf.pages:
	text += page.extract_text()
	return text
	except Exception as e:
	return f"Error extracting text from PDF: {str(e)}"

	# Salesforce connection
	try:
	sf_username = os.getenv("prudhvilathat@sandbox.com") # Use environment variable for username
	sf_password = os.getenv("varnika@143") # Use environment variable for password
	sf_security_token = os.getenv("pgaCkJKYcHKuhkrwxj5Hw4t1") # Use environment variable for security token

	if not all([sf_username, sf_password, sf_security_token]):
	raise ValueError("Salesforce credentials (SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN) are missing!")

	sf = Salesforce(username=sf_username, password=sf_password, security_token=sf_security_token)
	sf_connected = True
	logging.info("✅ Connected to Salesforce.")
	except Exception as e:
	sf = None
	sf_connected = False
	logging.error(f"❌ Salesforce connection failed: {e}")

	# Simple forecasting model (Moving Average)
	def simple_forecast(history):
	# Convert history to DataFrame
	df = pd.DataFrame(history)
	df['ds'] = pd.to_datetime(df['ds'])

	# Simple moving average to predict the next value
	df['yhat'] = df['y'].rolling(window=3, min_periods=1).mean()

	# Predict for the next 3 days
	future_dates = pd.date_range(df['ds'].max(), periods=4, freq='D')[1:]
	future_preds = np.repeat(df['yhat'].iloc[-1], 3)

	# Prepare predictions in the required format
	predictions = [{"date": future_dates[i].strftime('%Y-%m-%d'), "headcount": int(round(future_preds[i]))} for i in range(3)]

	# Prepare data for plotting (historical + forecast)
	plot_data = {
	'dates': df['ds'].tolist() + list(future_dates),
	'headcount': df['y'].tolist() + list(future_preds),
	'is_forecast': [False] * len(df) + [True] * 3
	}

	return predictions, plot_data

	# Function to create a forecast chart
	def create_forecast_chart(plot_data):
	plt.figure(figsize=(10, 6))

	# Separate historical and forecast data
	historical = [(d, h) for d, h, f in zip(plot_data['dates'], plot_data['headcount'], plot_data['is_forecast']) if not f]
	forecast = [(d, h) for d, h, f in zip(plot_data['dates'], plot_data['headcount'], plot_data['is_forecast']) if f]

	# Plot historical data
	if historical:
	hist_dates, hist_values = zip(*historical)
	plt.plot(hist_dates, hist_values, 'b-', label='Historical Headcount')
	plt.scatter(hist_dates, hist_values, color='blue', s=50)

	# Plot forecast data
	if forecast:
	fore_dates, fore_values = zip(*forecast)
	plt.plot(fore_dates, fore_values, 'r--', label='Forecasted Headcount')
	plt.scatter(fore_dates, fore_values, color='red', s=50)

	plt.title('Labour Headcount Forecast')
	plt.xlabel('Date')
	plt.ylabel('Headcount')
	plt.grid(True)
	plt.legend()
	plt.xticks(rotation=45)
	plt.tight_layout()

	# Save the plot
	chart_path = 'forecast_chart.png'
	plt.savefig(chart_path)
	plt.close()

	return chart_path

	# Function to process the PDF and display the desired output format
	def forecast_labour(pdf_file):
	try:
	# Extract text from the uploaded PDF
	extracted_text = extract_text_from_pdf(pdf_file.name)
	print(f"Extracted Text (first 500 chars): {extracted_text[:500]}")

	# Simulate historical data extraction (replace with real extraction logic)
	history = [{"ds": "2025-05-01", "y": 50}, {"ds": "2025-05-02", "y": 48}, {"ds": "2025-05-03", "y": 52}]

	# Apply the simple forecasting model
	predictions, plot_data = simple_forecast(history)

	# Create the forecast chart
	chart_path = create_forecast_chart(plot_data)

	# Format the result
	result = {
	"Title": "Labour Attendance Data",
	"Date": "May 2025",
	"Trade": "Electrician",
	"Weather": "Sunny",
	"Forecast": predictions
	}

	return result, chart_path

	except Exception as e:
	return {"error": f"Error processing the PDF: {str(e)}"}, None

	# Gradio interface to accept PDF and return a formatted output with chart
	def gradio_interface():
	gr.Interface(
	fn=forecast_labour,
	inputs=[gr.File(label="Upload PDF")],
	outputs=[gr.JSON(label="Forecast Result"), gr.Image(label="Forecast Chart")],
	title="Labour Attendance Forecast",
	description="This app forecasts labour shortages based on attendance logs, trends, and other factors extracted from uploaded PDFs. It also displays a chart of historical and forecasted headcount.",
	theme="default" # Changed to default theme
	).launch()

	# Start the Gradio app
	if __name__ == '__main__':
	gradio_interface()