pv / app.py
prudhviLatha's picture
Update app.py
71749cd verified
import os
import logging
import gradio as gr
import pandas as pd
import numpy as np
from datetime import datetime
from PyPDF2 import PdfReader
import matplotlib.pyplot as plt
from simple_salesforce import Salesforce
# Setup logging
logging.basicConfig(level=logging.INFO)
# Function to extract text from PDF using PyPDF2
def extract_text_from_pdf(pdf_path):
try:
with open(pdf_path, "rb") as file:
pdf = PdfReader(file)
text = ""
for page in pdf.pages:
text += page.extract_text()
return text
except Exception as e:
return f"Error extracting text from PDF: {str(e)}"
# Salesforce connection
try:
sf_username = os.getenv("prudhvilathat@sandbox.com") # Use environment variable for username
sf_password = os.getenv("varnika@143") # Use environment variable for password
sf_security_token = os.getenv("pgaCkJKYcHKuhkrwxj5Hw4t1") # Use environment variable for security token
if not all([sf_username, sf_password, sf_security_token]):
raise ValueError("Salesforce credentials (SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN) are missing!")
sf = Salesforce(username=sf_username, password=sf_password, security_token=sf_security_token)
sf_connected = True
logging.info("✅ Connected to Salesforce.")
except Exception as e:
sf = None
sf_connected = False
logging.error(f"❌ Salesforce connection failed: {e}")
# Simple forecasting model (Moving Average)
def simple_forecast(history):
# Convert history to DataFrame
df = pd.DataFrame(history)
df['ds'] = pd.to_datetime(df['ds'])
# Simple moving average to predict the next value
df['yhat'] = df['y'].rolling(window=3, min_periods=1).mean()
# Predict for the next 3 days
future_dates = pd.date_range(df['ds'].max(), periods=4, freq='D')[1:]
future_preds = np.repeat(df['yhat'].iloc[-1], 3)
# Prepare predictions in the required format
predictions = [{"date": future_dates[i].strftime('%Y-%m-%d'), "headcount": int(round(future_preds[i]))} for i in range(3)]
# Prepare data for plotting (historical + forecast)
plot_data = {
'dates': df['ds'].tolist() + list(future_dates),
'headcount': df['y'].tolist() + list(future_preds),
'is_forecast': [False] * len(df) + [True] * 3
}
return predictions, plot_data
# Function to create a forecast chart
def create_forecast_chart(plot_data):
plt.figure(figsize=(10, 6))
# Separate historical and forecast data
historical = [(d, h) for d, h, f in zip(plot_data['dates'], plot_data['headcount'], plot_data['is_forecast']) if not f]
forecast = [(d, h) for d, h, f in zip(plot_data['dates'], plot_data['headcount'], plot_data['is_forecast']) if f]
# Plot historical data
if historical:
hist_dates, hist_values = zip(*historical)
plt.plot(hist_dates, hist_values, 'b-', label='Historical Headcount')
plt.scatter(hist_dates, hist_values, color='blue', s=50)
# Plot forecast data
if forecast:
fore_dates, fore_values = zip(*forecast)
plt.plot(fore_dates, fore_values, 'r--', label='Forecasted Headcount')
plt.scatter(fore_dates, fore_values, color='red', s=50)
plt.title('Labour Headcount Forecast')
plt.xlabel('Date')
plt.ylabel('Headcount')
plt.grid(True)
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
# Save the plot
chart_path = 'forecast_chart.png'
plt.savefig(chart_path)
plt.close()
return chart_path
# Function to process the PDF and display the desired output format
def forecast_labour(pdf_file):
try:
# Extract text from the uploaded PDF
extracted_text = extract_text_from_pdf(pdf_file.name)
print(f"Extracted Text (first 500 chars): {extracted_text[:500]}")
# Simulate historical data extraction (replace with real extraction logic)
history = [{"ds": "2025-05-01", "y": 50}, {"ds": "2025-05-02", "y": 48}, {"ds": "2025-05-03", "y": 52}]
# Apply the simple forecasting model
predictions, plot_data = simple_forecast(history)
# Create the forecast chart
chart_path = create_forecast_chart(plot_data)
# Format the result
result = {
"Title": "Labour Attendance Data",
"Date": "May 2025",
"Trade": "Electrician",
"Weather": "Sunny",
"Forecast": predictions
}
return result, chart_path
except Exception as e:
return {"error": f"Error processing the PDF: {str(e)}"}, None
# Gradio interface to accept PDF and return a formatted output with chart
def gradio_interface():
gr.Interface(
fn=forecast_labour,
inputs=[gr.File(label="Upload PDF")],
outputs=[gr.JSON(label="Forecast Result"), gr.Image(label="Forecast Chart")],
title="Labour Attendance Forecast",
description="This app forecasts labour shortages based on attendance logs, trends, and other factors extracted from uploaded PDFs. It also displays a chart of historical and forecasted headcount.",
theme="default" # Changed to default theme
).launch()
# Start the Gradio app
if __name__ == '__main__':
gradio_interface()