Gantt_chart / app.py
Aizaz96's picture
Update app.py
6dc1a2d verified
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import networkx as nx
import tempfile
import os
import matplotlib.pyplot as plt
from docx import Document
import pdfplumber
st.set_page_config(page_title="Project Schedule Analyzer", layout="wide")
st.title("πŸ“Š Project Gantt & Critical Path Analyzer")
# Utility to read different file types
def read_schedule(file):
ext = file.name.split('.')[-1].lower()
if ext == 'csv':
df = pd.read_csv(file)
elif ext == 'pdf':
with pdfplumber.open(file) as pdf:
text = ''
for page in pdf.pages:
text += page.extract_text() + '\n'
df = pd.read_csv(pd.compat.StringIO(text))
elif ext == 'docx':
doc = Document(file)
data = []
for table in doc.tables:
for i, row in enumerate(table.rows):
text = [cell.text.strip() for cell in row.cells]
data.append(text)
df = pd.DataFrame(data[1:], columns=data[0])
else:
return None
return df
# Attempt to normalize column names
def normalize_columns(df):
col_map = {
'task name': 'Task',
'activity': 'Task',
'activity name': 'Task',
'task id': 'Task ID',
'id': 'Task ID',
'start date': 'Start Date',
'start': 'Start Date',
'end date': 'End Date',
'end': 'End Date',
'duration': 'Duration (days)',
'duration (days)': 'Duration (days)',
'predecessor': 'Predecessor',
'predecessors': 'Predecessor'
}
df.columns = [col_map.get(col.lower().strip(), col.strip()) for col in df.columns]
return df
def project_management():
st.header("πŸ“… Upload Project Schedule File")
file = st.file_uploader("Choose a file (CSV, PDF, DOCX)", type=["csv", "pdf", "docx"])
if file:
df = read_schedule(file)
if df is None:
st.error("❌ Unsupported file format or failed to parse the file.")
return
df = normalize_columns(df)
required_cols = {"Task", "Task ID", "Start Date", "Duration (days)", "End Date", "Predecessor"}
if not required_cols.issubset(set(df.columns)):
st.warning(f"⚠️ Detected columns: {list(df.columns)}")
st.error("Required columns are missing. Please make sure your file includes: " + ", ".join(required_cols))
return
# Convert date columns
df["Start Date"] = pd.to_datetime(df["Start Date"], errors='coerce')
df["End Date"] = pd.to_datetime(df["End Date"], errors='coerce')
if df[["Start Date", "End Date"]].isnull().any().any():
st.error("❌ Some date values could not be parsed. Please check date formats.")
return
st.subheader("πŸ“ Project Schedule")
st.dataframe(df)
st.subheader("πŸ“ˆ Gantt Chart")
fig = px.timeline(df, x_start="Start Date", x_end="End Date", y="Task", color="Task")
fig.update_yaxes(categoryorder="total ascending")
st.plotly_chart(fig, use_container_width=True)
st.subheader("πŸ”Ί Critical Path")
G = nx.DiGraph()
for _, row in df.iterrows():
task_id = str(row["Task ID"])
G.add_node(task_id, label=row["Task"], duration=row["Duration (days)"])
if pd.notna(row["Predecessor"]) and str(row["Predecessor"]).strip():
preds = [str(p.strip()) for p in str(row["Predecessor"]).split(",")]
for pred in preds:
G.add_edge(pred, task_id)
try:
path = nx.dag_longest_path(G, weight="duration")
st.success(f"Critical Path: {' β†’ '.join([G.nodes[n]['label'] for n in path])}")
pos = nx.spring_layout(G)
labels = {n: G.nodes[n]['label'] for n in G.nodes()}
edge_colors = ['red' if u in path and v in path and path.index(v) == path.index(u) + 1 else 'black'
for u, v in G.edges()]
fig_cp, ax = plt.subplots(figsize=(10, 6))
nx.draw(G, pos, with_labels=True, labels=labels,
node_color='skyblue', node_size=2000,
edge_color=edge_colors, width=2, font_size=10, ax=ax)
st.pyplot(fig_cp)
except Exception as e:
st.error(f"❌ Failed to compute critical path: {e}")
project_management()