Spaces:
Build error
Build error
| # app.py | |
| import streamlit as st | |
| from st_aggrid import AgGrid, GridOptionsBuilder | |
| import pandas as pd | |
| import os, json | |
| from datetime import datetime | |
| from data_ingestion.ingest_data import read_document | |
| from data_ingestion.preprocess_data import preprocess_text | |
| from llm_integration.task_extraction import extract_tasks_from_text | |
| from llm_integration.task_comparison import compare_task_data | |
| from database.mongo_integration import * | |
| from notion_client import Client | |
| from pymongo import MongoClient | |
| from datetime import datetime | |
| # from database.mongo_integration import save_tasks_to_mongo | |
| os.makedirs("data/output", exist_ok=True) | |
| import pandas as pd | |
| mongo_client = MongoClient("mongodb+srv://shahid:Protondev%40456@cluster0.ruurd.mongodb.net/") # Replace with your MongoDB URI | |
| db = mongo_client["task_management"] | |
| employee_project_collection = db["employee_project"] | |
| notion = Client(auth="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") | |
| parent_page_id = "148b2f92b9948099a854e8b21a0640a3" | |
| notion_database_id = "14db2f92-b994-81fb-9132-f4e4cb46ac13" | |
| def fetch_latest_task_entry(): | |
| """ | |
| Fetch the most recent entry from MongoDB. | |
| Returns: | |
| dict: The latest task entry as a dictionary. | |
| """ | |
| latest_entry = employee_project_collection.find_one(sort=[("created_at", DESCENDING)]) | |
| if latest_entry: | |
| return latest_entry | |
| else: | |
| raise ValueError("No entries found in MongoDB.") | |
| def push_to_notion(latest_entry): | |
| """ | |
| Push tasks from the latest entry to the Notion database. | |
| Args: | |
| latest_entry (dict): The most recent task data from MongoDB. | |
| """ | |
| # Extract the tasks from the JSON | |
| tasks = latest_entry.get("consolidated_final_task", {}) | |
| created_at = latest_entry.get("created_at", None) | |
| # Step 1: Archive existing tasks in Notion database | |
| with st.spinner("Archiving existing tasks in Notion..."): | |
| try: | |
| # Query all pages in the Notion database (this will fetch the existing tasks) | |
| notion_database = notion.databases.query(database_id=notion_database_id) | |
| # Loop through the database pages and archive them | |
| for page in notion_database['results']: | |
| notion.pages.update(page_id=page['id'], archived=True) | |
| st.info("Old tasks archived in Notion successfully.") | |
| except Exception as e: | |
| st.error(f"Failed to archive tasks in Notion: {e}") | |
| # Step 2: Push new tasks to Notion | |
| with st.spinner("Pushing new tasks to Notion..."): | |
| try: | |
| # Iterate over projects and their tasks | |
| for project_name, task_list in tasks.items(): | |
| for task_id, task_details in task_list.items(): | |
| # Map MongoDB fields to Notion properties | |
| notion_task = { | |
| "parent": {"database_id": notion_database_id}, | |
| "properties": { | |
| "Project Name": {"title": [{"type": "text", "text": {"content": project_name}}]}, | |
| "Task ID": {"rich_text": [{"type": "text", "text": {"content": task_id}}]}, | |
| "Description": {"rich_text": [{"type": "text", "text": {"content": task_details.get("description", "")}}]}, | |
| "Priority": {"select": {"name": task_details.get("priority", "low")}}, | |
| "Assigned To": {"rich_text": [{"type": "text", "text": {"content": task_details.get("assigned_to", "")}}]}, # Updated to rich_text | |
| "Current Status": {"select": {"name": task_details.get("current_status", "pending")}}, | |
| "Created At": {"date": {"start": created_at.isoformat() if created_at else datetime.utcnow().isoformat()}} | |
| } | |
| } | |
| # Push each task to Notion | |
| response = notion.pages.create(**notion_task) | |
| print(f"Task pushed to Notion: {response['id']}") | |
| st.success("New tasks pushed to Notion successfully!") | |
| except Exception as e: | |
| st.error(f"Failed to push tasks to Notion: {e}") | |
| def json_to_dataframe(json_data): | |
| """ | |
| Converts a nested JSON structure into a user-friendly Pandas DataFrame for display. | |
| Args: | |
| json_data (dict): The JSON object containing projects and tasks. | |
| Returns: | |
| pd.DataFrame: A DataFrame representing the JSON data. | |
| """ | |
| data = [] | |
| for project_name, tasks in json_data.items(): | |
| for task_id, task_details in tasks.items(): | |
| data.append({ | |
| "Project": project_name, | |
| "Task Name": task_id, | |
| "Description": task_details["description"], | |
| "Priority": task_details["priority"], | |
| "Assigned To": task_details["assigned_to"], | |
| "Status": task_details["current_status"] | |
| }) | |
| return pd.DataFrame(data) | |
| def dataframe_to_json(df): | |
| """ | |
| Converts a Pandas DataFrame back into a nested JSON structure. | |
| Args: | |
| df (pd.DataFrame): The DataFrame containing projects and tasks. | |
| Returns: | |
| dict: A nested dictionary representing the original JSON data. | |
| """ | |
| json_data = {} | |
| # Iterate over each row of the DataFrame | |
| for _, row in df.iterrows(): | |
| project_name = row['Project'] | |
| task_id = row['Task Name'] | |
| # Ensure the project exists in the JSON structure | |
| if project_name not in json_data: | |
| json_data[project_name] = {} | |
| # Add or update the task under the corresponding project | |
| json_data[project_name][task_id] = { | |
| "description": row['Description'], | |
| "priority": row['Priority'], | |
| "assigned_to": row['Assigned To'], | |
| "current_status": row['Status'] | |
| } | |
| return json_data | |
| # Function to fetch the most recent tasks from Notion | |
| def fetch_recent_tasks_from_notion(): | |
| """ | |
| Fetch the most recent tasks from the Notion database and return it as a list of dicts. | |
| """ | |
| try: | |
| # Query the database to get the most recent tasks | |
| query_response = notion.databases.query( | |
| **{ | |
| "database_id": notion_database_id, | |
| "sorts": [{"property": "Created At", "direction": "descending"}], | |
| "page_size": 20 # Get the 5 most recent tasks, adjust the page size as needed | |
| } | |
| ) | |
| # Extract tasks from the query response | |
| tasks = [] | |
| for result in query_response.get("results", []): | |
| task_data = { | |
| "Project Name": result["properties"]["Project Name"]["title"][0]["text"]["content"], | |
| "Task ID": result["properties"]["Task ID"]["rich_text"][0]["text"]["content"], | |
| "Description": result["properties"]["Description"]["rich_text"][0]["text"]["content"], | |
| "Priority": result["properties"]["Priority"]["select"]["name"], | |
| "Assigned To": result["properties"]["Assigned To"]["rich_text"][0]["text"]["content"] if result["properties"]["Assigned To"]["rich_text"] else "", | |
| "Current Status": result["properties"]["Current Status"]["select"]["name"], | |
| "Created At": result["properties"]["Created At"]["date"]["start"] | |
| } | |
| tasks.append(task_data) | |
| return tasks | |
| except Exception as e: | |
| print(f"Error fetching tasks from Notion: {e}") | |
| return [] | |
| # Function to display recent tasks in DataFrame on the dashboard | |
| def display_recent_tasks_on_dashboard(): | |
| """ | |
| Fetch and display the most recent tasks from Notion in a DataFrame on the Streamlit dashboard. | |
| """ | |
| tasks = fetch_recent_tasks_from_notion() | |
| if tasks: | |
| # Convert tasks into a DataFrame | |
| df = pd.DataFrame(tasks) | |
| # Display DataFrame in Streamlit | |
| st.subheader("Most Recent Tasks from Notion") | |
| st.dataframe(df) | |
| else: | |
| st.write("No tasks found in the Notion database.") | |
| # Initialize Streamlit app | |
| st.set_page_config( | |
| page_title="Task Management", | |
| page_icon="π", | |
| layout="wide" | |
| ) | |
| # Define session state for managing intermediate data | |
| if "processed_tasks" not in st.session_state: | |
| st.session_state.processed_tasks = None | |
| if "edited_df" not in st.session_state: | |
| st.session_state.edited_df=None | |
| if "comparison_results" not in st.session_state: | |
| st.session_state.comparison_results = None | |
| tab1, tab2,tab3 = st.tabs(["Dashboard", "Upload and Process","Review Updated Tasks"]) # | |
| # Initialize session state for tab navigation | |
| if "active_tab" not in st.session_state: | |
| st.session_state.active_tab = 0 | |
| # Function to switch tabs | |
| def switch_tab(tab_index): | |
| st.session_state.active_tab = tab_index | |
| # ------------------------------- | |
| # Tab 1: Dashboard | |
| # ------------------------------- | |
| # if st.session_state.active_tab == 0: | |
| with tab1: | |
| with st.container(): | |
| st.title("π Task Management Dashboard") | |
| # Display recent tasks from Notion in a DataFrame | |
| display_recent_tasks_on_dashboard() | |
| # # Quick actions | |
| # st.subheader("Quick Actions") | |
| # if st.button("Upload and Process New Tasks"): | |
| # switch_tab(1) | |
| # if st.button("Review and Approve Tasks"): | |
| # switch_tab(2) | |
| # ------------------------------- | |
| # Tab 2: Upload and Process | |
| # ------------------------------- | |
| # elif st.session_state.active_tab == 1: | |
| # Tab 2: Upload and Process | |
| with tab2: | |
| with st.container(): | |
| st.title("π€ Upload and Process Tasks") | |
| uploaded_file = st.file_uploader("Upload a .docx file", type=["docx"]) | |
| if uploaded_file is not None: | |
| with st.spinner("Processing uploaded file..."): | |
| # Step 1: Extract cleaned text | |
| raw_data = read_document(uploaded_file) | |
| cleaned_text = preprocess_text(raw_data) | |
| cleaned_text = "\n".join([f"{entry['author']}: {entry['text']}" for entry in cleaned_text]) | |
| # Step 2: Extract tasks | |
| if 'df' not in st.session_state: | |
| extracted_tasks = extract_tasks_from_text(cleaned_text) | |
| st.subheader("Processed Tasks (DataFrame View)") | |
| st.session_state.df = json_to_dataframe(extracted_tasks) | |
| # Display the DataFrame for editing | |
| edited_df = st.data_editor(st.session_state.df) | |
| st.session_state.edited_df = edited_df | |
| edited_extracted_tasks_json = dataframe_to_json(edited_df) | |
| st.session_state.processed_tasks = edited_extracted_tasks_json | |
| st.success("Tasks extracted successfully!") | |
| # Step 3: Push extracted tasks to MongoDB | |
| if st.button("Save tasks & Compare"): | |
| with st.spinner("Saving tasks to MongoDB..."): | |
| try: | |
| insert_weekly_task_data(edited_extracted_tasks_json) | |
| st.success("Tasks successfully saved to the database!") | |
| except Exception as e: | |
| st.error(f"Failed to save tasks to the database: {e}") | |
| if 'df' in st.session_state: | |
| del st.session_state['df'] | |
| st.info("Temporary data removed from session state.") | |
| # Step 4: Run comparison | |
| with st.spinner("Running task comparison..."): | |
| st.write("Running task comparison...") | |
| recent_entries = fetch_recent_two_entries() | |
| latest_entry = fetch_latest_task_entry() | |
| if len(recent_entries) >= 2: | |
| old_tasks = latest_entry.get("consolidated_final_task", {}) | |
| new_tasks = recent_entries[0]["tasks"] | |
| comparison_results = compare_task_data(old_tasks, new_tasks) | |
| st.session_state.comparison_results = comparison_results | |
| st.success("Task comparison completed! Please move to Review section") | |
| else: | |
| st.warning("Not enough data to run comparison.") | |
| # Tab 3: Review and Approve Tasks | |
| with tab3: | |
| st.title("π Review and Approve Tasks") | |
| if st.session_state.comparison_results is None: | |
| st.warning("No comparison results available. Please upload and process tasks first.") | |
| else: | |
| # Display comparison results | |
| if st.session_state.comparison_results: | |
| # st.subheader("Comparison Results (DataFrame View)") | |
| if "compared_df" not in st.session_state: | |
| st.session_state.compared_df = json_to_dataframe(st.session_state.comparison_results) | |
| # st.dataframe(st.session_state.compared_df) | |
| # Inline editing of tasks | |
| st.subheader("Edit Tasks") | |
| final_edited_df = st.data_editor(st.session_state.compared_df) | |
| st.session_state.final_edited_df = final_edited_df | |
| final_extracted_tasks_json = dataframe_to_json(final_edited_df) | |
| # Approval and finalization | |
| if st.button("Approve and Finalize Tasks"): | |
| with st.spinner("Finalizing tasks..."): | |
| try: | |
| db = get_database() | |
| updated_collection = db["employee_project"] | |
| document = { | |
| "consolidated_final_task": final_extracted_tasks_json, | |
| "created_at": datetime.now() | |
| } | |
| updated_collection.insert_one(document) | |
| st.success("Finalized tasks saved successfully!") | |
| except Exception as e: | |
| st.error(f"Failed to save tasks: {e}") | |
| if st.button("Push to Notion Dashboard"): | |
| with st.spinner("Pushing to Notion..."): | |
| latest_entry = fetch_latest_task_entry() | |
| push_to_notion(latest_entry) | |
| st.success("Notion Dashboard has been updated") | |