import streamlit as st import pandas as pd import matplotlib.pyplot as plt import numpy as np import os import pickle import io import traceback import sys from datetime import datetime # Import ML libraries with error handling try: from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from sklearn.metrics import accuracy_score, classification_report, confusion_matrix from sklearn.preprocessing import LabelEncoder st.success("โœ… Sklearn imported successfully") except ImportError as e: st.error(f"โŒ Sklearn import error: {e}") # Import custom modules with error handling try: from NoCodeTextClassifier.EDA import Informations, Visualizations from NoCodeTextClassifier.preprocessing import process, TextCleaner, Vectorization from NoCodeTextClassifier.models import Models st.success("โœ… NoCodeTextClassifier imported successfully") except ImportError as e: st.error(f"โŒ NoCodeTextClassifier import error: {e}") st.info("Please ensure NoCodeTextClassifier package is installed") # Set page config st.set_page_config(page_title="Debug Text Classification", page_icon="๐Ÿ”", layout="wide") # Debug section st.sidebar.header("๐Ÿ” Debug Information") debug_mode = st.sidebar.checkbox("Enable Debug Mode", value=True) def debug_log(message, level="INFO"): """Debug logging function""" if debug_mode: timestamp = datetime.now().strftime("%H:%M:%S") st.sidebar.write(f"**{timestamp} [{level}]:** {message}") def detailed_error_info(e): """Get detailed error information""" error_type = type(e).__name__ error_message = str(e) error_traceback = traceback.format_exc() return { 'type': error_type, 'message': error_message, 'traceback': error_traceback } def inspect_uploaded_file(uploaded_file): """Inspect uploaded file properties""" debug_log("๐Ÿ” Inspecting uploaded file...") try: file_info = { 'name': uploaded_file.name, 'type': uploaded_file.type, 'size': uploaded_file.size, 'file_id': getattr(uploaded_file, 'file_id', 'Not available') } debug_log(f"File name: {file_info['name']}") debug_log(f"File type: {file_info['type']}") debug_log(f"File size: {file_info['size']} bytes") debug_log(f"File ID: {file_info['file_id']}") # Try to read first few bytes uploaded_file.seek(0) first_bytes = uploaded_file.read(100) debug_log(f"First 100 bytes type: {type(first_bytes)}") debug_log(f"First 100 bytes preview: {first_bytes[:50]}...") # Reset file pointer uploaded_file.seek(0) return file_info except Exception as e: error_info = detailed_error_info(e) debug_log(f"โŒ Error inspecting file: {error_info['type']}: {error_info['message']}", "ERROR") st.sidebar.error(f"File inspection error: {error_info['message']}") return None def safe_read_csv_debug(uploaded_file, encoding_options=['utf-8', 'latin1', 'iso-8859-1', 'cp1252']): """Safely read CSV with extensive debugging""" debug_log("๐Ÿ”„ Starting CSV read process...") # Inspect file first file_info = inspect_uploaded_file(uploaded_file) if file_info is None: return None # Try different reading methods methods = [ ("Direct pandas read", lambda f: pd.read_csv(f)), ("BytesIO method", lambda f: pd.read_csv(io.BytesIO(f.read()))), ("StringIO method", lambda f: pd.read_csv(io.StringIO(f.read().decode('utf-8')))), ] for method_name, method_func in methods: debug_log(f"๐Ÿ”„ Trying method: {method_name}") for encoding in encoding_options: try: debug_log(f" - Attempting encoding: {encoding}") uploaded_file.seek(0) if method_name == "Direct pandas read": df = pd.read_csv(uploaded_file, encoding=encoding) elif method_name == "BytesIO method": uploaded_file.seek(0) content = uploaded_file.read() df = pd.read_csv(io.BytesIO(content), encoding=encoding) elif method_name == "StringIO method": uploaded_file.seek(0) content = uploaded_file.read() if isinstance(content, bytes): content = content.decode(encoding) df = pd.read_csv(io.StringIO(content)) debug_log(f"โœ… Success with {method_name} + {encoding}") debug_log(f"DataFrame shape: {df.shape}") debug_log(f"Columns: {list(df.columns)}") st.success(f"File loaded successfully using {method_name} with {encoding} encoding") return df except UnicodeDecodeError as e: debug_log(f" - Unicode error with {encoding}: {str(e)}", "WARNING") continue except Exception as e: error_info = detailed_error_info(e) debug_log(f" - Error with {method_name} + {encoding}: {error_info['type']}: {error_info['message']}", "ERROR") # Show detailed error for 403 or permission errors if "403" in str(e) or "permission" in str(e).lower(): st.error("๐Ÿšจ PERMISSION ERROR DETECTED!") st.error(f"Method: {method_name}, Encoding: {encoding}") st.error(f"Error type: {error_info['type']}") st.error(f"Error message: {error_info['message']}") st.code(error_info['traceback']) continue debug_log("โŒ All reading methods failed", "ERROR") st.error("All CSV reading methods failed. Check debug log for details.") return None # Utility functions with debugging def save_artifacts(obj, folder_name, file_name): """Save artifacts with debugging""" debug_log(f"๐Ÿ’พ Saving {file_name} to {folder_name}") try: os.makedirs(folder_name, exist_ok=True) full_path = os.path.join(folder_name, file_name) with open(full_path, 'wb') as f: pickle.dump(obj, f) debug_log(f"โœ… Successfully saved {file_name}") return True except Exception as e: error_info = detailed_error_info(e) debug_log(f"โŒ Error saving {file_name}: {error_info['message']}", "ERROR") st.error(f"Save error: {error_info['message']}") return False def load_artifacts(folder_name, file_name): """Load artifacts with debugging""" debug_log(f"๐Ÿ“‚ Loading {file_name} from {folder_name}") try: full_path = os.path.join(folder_name, file_name) if not os.path.exists(full_path): debug_log(f"โŒ File not found: {full_path}", "ERROR") return None with open(full_path, 'rb') as f: obj = pickle.load(f) debug_log(f"โœ… Successfully loaded {file_name}") return obj except Exception as e: error_info = detailed_error_info(e) debug_log(f"โŒ Error loading {file_name}: {error_info['message']}", "ERROR") st.error(f"Load error: {error_info['message']}") return None def load_model(model_name): """Load model with debugging""" debug_log(f"๐Ÿค– Loading model: {model_name}") return load_artifacts("models", model_name) def predict_text(model_name, text, vectorizer_type="tfidf"): """Make prediction with debugging""" debug_log(f"๐Ÿ”ฎ Starting prediction with {model_name}") try: # Load components model = load_model(model_name) if model is None: return None, None vectorizer_file = f"{vectorizer_type}_vectorizer.pkl" vectorizer = load_artifacts("artifacts", vectorizer_file) if vectorizer is None: return None, None encoder = load_artifacts("artifacts", "encoder.pkl") if encoder is None: return None, None debug_log("๐Ÿงน Cleaning text...") text_cleaner = TextCleaner() clean_text = text_cleaner.clean_text(text) debug_log(f"Cleaned text preview: {clean_text[:50]}...") debug_log("๐Ÿ”ข Vectorizing text...") text_vector = vectorizer.transform([clean_text]) debug_log(f"Vector shape: {text_vector.shape}") debug_log("๐ŸŽฏ Making prediction...") prediction = model.predict(text_vector) prediction_proba = None if hasattr(model, 'predict_proba'): try: prediction_proba = model.predict_proba(text_vector)[0] debug_log(f"Prediction probabilities: {prediction_proba}") except: debug_log("No prediction probabilities available", "WARNING") predicted_label = encoder.inverse_transform(prediction)[0] debug_log(f"โœ… Prediction complete: {predicted_label}") return predicted_label, prediction_proba except Exception as e: error_info = detailed_error_info(e) debug_log(f"โŒ Prediction error: {error_info['message']}", "ERROR") st.error(f"Prediction error: {error_info['message']}") if debug_mode: st.code(error_info['traceback']) return None, None # Main App st.title('๐Ÿ” Debug Text Classification App') st.write('Debug version to identify and fix issues') # Environment info if debug_mode: st.sidebar.subheader("๐Ÿ–ฅ๏ธ Environment Info") st.sidebar.write(f"Python version: {sys.version}") st.sidebar.write(f"Streamlit version: {st.__version__}") st.sidebar.write(f"Pandas version: {pd.__version__}") st.sidebar.write(f"Current working directory: {os.getcwd()}") # Check directory permissions try: test_dir = "test_permissions" os.makedirs(test_dir, exist_ok=True) test_file = os.path.join(test_dir, "test.txt") with open(test_file, 'w') as f: f.write("test") os.remove(test_file) os.rmdir(test_dir) st.sidebar.success("โœ… File system permissions OK") except Exception as e: st.sidebar.error(f"โŒ File system permission issue: {e}") # Sidebar navigation section = st.sidebar.radio("Choose Section", ["File Upload Debug", "Data Analysis", "Train Model", "Predictions"]) # Session state initialization if 'vectorizer_type' not in st.session_state: st.session_state.vectorizer_type = "tfidf" if 'train_df' not in st.session_state: st.session_state.train_df = None # File Upload Debug Section if section == "File Upload Debug": st.subheader("๐Ÿ” File Upload Debugging") st.info("This section helps debug file upload issues. Upload your file and see detailed error information.") train_data = st.file_uploader("Upload training data (DEBUG MODE)", type=["csv"], key="debug_upload") if train_data is not None: st.write("### File Upload Detected!") # Show raw file info st.write("**Raw File Information:**") st.json({ "name": train_data.name, "type": train_data.type if hasattr(train_data, 'type') else "Unknown", "size": train_data.size if hasattr(train_data, 'size') else "Unknown" }) # Try to read the file st.write("### Attempting to Read File...") with st.spinner("Reading file with debug mode..."): df = safe_read_csv_debug(train_data) if df is not None: st.success("๐ŸŽ‰ File successfully loaded!") st.write("**Data Preview:**") st.dataframe(df.head()) st.write(f"**Shape:** {df.shape}") st.write(f"**Columns:** {list(df.columns)}") st.write(f"**Data Types:**") st.write(df.dtypes) # Store in session state st.session_state.train_df = df else: st.error("โŒ Failed to load file. Check the debug log for details.") # Additional troubleshooting st.write("### ๐Ÿ”ง Troubleshooting Steps:") st.write("1. Check if your file is a valid CSV") st.write("2. Try saving your CSV with different encoding (UTF-8 recommended)") st.write("3. Check if file size is within limits") st.write("4. Ensure no special characters in filename") st.write("5. Try uploading from a different location") # Other sections (simplified for debugging) elif section == "Data Analysis": st.subheader("๐Ÿ“Š Data Analysis") if st.session_state.train_df is not None: df = st.session_state.train_df st.write("Using loaded data from debug session:") st.dataframe(df.head()) # Basic analysis without custom modules if they fail st.write(f"**Shape:** {df.shape}") st.write(f"**Columns:** {list(df.columns)}") st.write(f"**Missing values:**") st.write(df.isnull().sum()) else: st.warning("No data loaded. Please use 'File Upload Debug' section first.") elif section == "Train Model": st.subheader("๐Ÿค– Train Model") st.info("Use this section after successfully loading data in debug mode.") if st.session_state.train_df is not None: st.success("Data available for training!") # Add your training logic here else: st.warning("No data loaded. Please use 'File Upload Debug' section first.") elif section == "Predictions": st.subheader("๐Ÿ”ฎ Predictions") st.info("Use this section after training a model.") # Check for trained models if os.path.exists("models"): models = [f for f in os.listdir("models") if f.endswith('.pkl')] if models: st.write(f"Available models: {models}") else: st.info("No trained models found.") else: st.info("Models directory not found.") # Debug summary if debug_mode: st.sidebar.markdown("---") st.sidebar.subheader("๐Ÿ“‹ Debug Summary") if st.session_state.train_df is not None: st.sidebar.success("โœ… Data loaded successfully") else: st.sidebar.warning("โš ๏ธ No data loaded")