Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime | |
| from PyPDF2 import PdfReader | |
| import matplotlib.pyplot as plt | |
| from simple_salesforce import Salesforce | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| # Function to extract text from PDF using PyPDF2 | |
| def extract_text_from_pdf(pdf_path): | |
| try: | |
| with open(pdf_path, "rb") as file: | |
| pdf = PdfReader(file) | |
| text = "" | |
| for page in pdf.pages: | |
| text += page.extract_text() | |
| return text | |
| except Exception as e: | |
| return f"Error extracting text from PDF: {str(e)}" | |
| # Salesforce connection | |
| try: | |
| sf_username = os.getenv("prudhvilathat@sandbox.com") # Use environment variable for username | |
| sf_password = os.getenv("varnika@143") # Use environment variable for password | |
| sf_security_token = os.getenv("pgaCkJKYcHKuhkrwxj5Hw4t1") # Use environment variable for security token | |
| if not all([sf_username, sf_password, sf_security_token]): | |
| raise ValueError("Salesforce credentials (SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN) are missing!") | |
| sf = Salesforce(username=sf_username, password=sf_password, security_token=sf_security_token) | |
| sf_connected = True | |
| logging.info("✅ Connected to Salesforce.") | |
| except Exception as e: | |
| sf = None | |
| sf_connected = False | |
| logging.error(f"❌ Salesforce connection failed: {e}") | |
| # Simple forecasting model (Moving Average) | |
| def simple_forecast(history): | |
| # Convert history to DataFrame | |
| df = pd.DataFrame(history) | |
| df['ds'] = pd.to_datetime(df['ds']) | |
| # Simple moving average to predict the next value | |
| df['yhat'] = df['y'].rolling(window=3, min_periods=1).mean() | |
| # Predict for the next 3 days | |
| future_dates = pd.date_range(df['ds'].max(), periods=4, freq='D')[1:] | |
| future_preds = np.repeat(df['yhat'].iloc[-1], 3) | |
| # Prepare predictions in the required format | |
| predictions = [{"date": future_dates[i].strftime('%Y-%m-%d'), "headcount": int(round(future_preds[i]))} for i in range(3)] | |
| # Prepare data for plotting (historical + forecast) | |
| plot_data = { | |
| 'dates': df['ds'].tolist() + list(future_dates), | |
| 'headcount': df['y'].tolist() + list(future_preds), | |
| 'is_forecast': [False] * len(df) + [True] * 3 | |
| } | |
| return predictions, plot_data | |
| # Function to create a forecast chart | |
| def create_forecast_chart(plot_data): | |
| plt.figure(figsize=(10, 6)) | |
| # Separate historical and forecast data | |
| historical = [(d, h) for d, h, f in zip(plot_data['dates'], plot_data['headcount'], plot_data['is_forecast']) if not f] | |
| forecast = [(d, h) for d, h, f in zip(plot_data['dates'], plot_data['headcount'], plot_data['is_forecast']) if f] | |
| # Plot historical data | |
| if historical: | |
| hist_dates, hist_values = zip(*historical) | |
| plt.plot(hist_dates, hist_values, 'b-', label='Historical Headcount') | |
| plt.scatter(hist_dates, hist_values, color='blue', s=50) | |
| # Plot forecast data | |
| if forecast: | |
| fore_dates, fore_values = zip(*forecast) | |
| plt.plot(fore_dates, fore_values, 'r--', label='Forecasted Headcount') | |
| plt.scatter(fore_dates, fore_values, color='red', s=50) | |
| plt.title('Labour Headcount Forecast') | |
| plt.xlabel('Date') | |
| plt.ylabel('Headcount') | |
| plt.grid(True) | |
| plt.legend() | |
| plt.xticks(rotation=45) | |
| plt.tight_layout() | |
| # Save the plot | |
| chart_path = 'forecast_chart.png' | |
| plt.savefig(chart_path) | |
| plt.close() | |
| return chart_path | |
| # Function to process the PDF and display the desired output format | |
| def forecast_labour(pdf_file): | |
| try: | |
| # Extract text from the uploaded PDF | |
| extracted_text = extract_text_from_pdf(pdf_file.name) | |
| print(f"Extracted Text (first 500 chars): {extracted_text[:500]}") | |
| # Simulate historical data extraction (replace with real extraction logic) | |
| history = [{"ds": "2025-05-01", "y": 50}, {"ds": "2025-05-02", "y": 48}, {"ds": "2025-05-03", "y": 52}] | |
| # Apply the simple forecasting model | |
| predictions, plot_data = simple_forecast(history) | |
| # Create the forecast chart | |
| chart_path = create_forecast_chart(plot_data) | |
| # Format the result | |
| result = { | |
| "Title": "Labour Attendance Data", | |
| "Date": "May 2025", | |
| "Trade": "Electrician", | |
| "Weather": "Sunny", | |
| "Forecast": predictions | |
| } | |
| return result, chart_path | |
| except Exception as e: | |
| return {"error": f"Error processing the PDF: {str(e)}"}, None | |
| # Gradio interface to accept PDF and return a formatted output with chart | |
| def gradio_interface(): | |
| gr.Interface( | |
| fn=forecast_labour, | |
| inputs=[gr.File(label="Upload PDF")], | |
| outputs=[gr.JSON(label="Forecast Result"), gr.Image(label="Forecast Chart")], | |
| title="Labour Attendance Forecast", | |
| description="This app forecasts labour shortages based on attendance logs, trends, and other factors extracted from uploaded PDFs. It also displays a chart of historical and forecasted headcount.", | |
| theme="default" # Changed to default theme | |
| ).launch() | |
| # Start the Gradio app | |
| if __name__ == '__main__': | |
| gradio_interface() | |