Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import os | |
| import pickle | |
| import io | |
| import traceback | |
| import sys | |
| from datetime import datetime | |
| # Import ML libraries with error handling | |
| try: | |
| from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer | |
| from sklearn.metrics import accuracy_score, classification_report, confusion_matrix | |
| from sklearn.preprocessing import LabelEncoder | |
| st.success("โ Sklearn imported successfully") | |
| except ImportError as e: | |
| st.error(f"โ Sklearn import error: {e}") | |
| # Import custom modules with error handling | |
| try: | |
| from NoCodeTextClassifier.EDA import Informations, Visualizations | |
| from NoCodeTextClassifier.preprocessing import process, TextCleaner, Vectorization | |
| from NoCodeTextClassifier.models import Models | |
| st.success("โ NoCodeTextClassifier imported successfully") | |
| except ImportError as e: | |
| st.error(f"โ NoCodeTextClassifier import error: {e}") | |
| st.info("Please ensure NoCodeTextClassifier package is installed") | |
| # Set page config | |
| st.set_page_config(page_title="Debug Text Classification", page_icon="๐", layout="wide") | |
| # Debug section | |
| st.sidebar.header("๐ Debug Information") | |
| debug_mode = st.sidebar.checkbox("Enable Debug Mode", value=True) | |
| def debug_log(message, level="INFO"): | |
| """Debug logging function""" | |
| if debug_mode: | |
| timestamp = datetime.now().strftime("%H:%M:%S") | |
| st.sidebar.write(f"**{timestamp} [{level}]:** {message}") | |
| def detailed_error_info(e): | |
| """Get detailed error information""" | |
| error_type = type(e).__name__ | |
| error_message = str(e) | |
| error_traceback = traceback.format_exc() | |
| return { | |
| 'type': error_type, | |
| 'message': error_message, | |
| 'traceback': error_traceback | |
| } | |
| def inspect_uploaded_file(uploaded_file): | |
| """Inspect uploaded file properties""" | |
| debug_log("๐ Inspecting uploaded file...") | |
| try: | |
| file_info = { | |
| 'name': uploaded_file.name, | |
| 'type': uploaded_file.type, | |
| 'size': uploaded_file.size, | |
| 'file_id': getattr(uploaded_file, 'file_id', 'Not available') | |
| } | |
| debug_log(f"File name: {file_info['name']}") | |
| debug_log(f"File type: {file_info['type']}") | |
| debug_log(f"File size: {file_info['size']} bytes") | |
| debug_log(f"File ID: {file_info['file_id']}") | |
| # Try to read first few bytes | |
| uploaded_file.seek(0) | |
| first_bytes = uploaded_file.read(100) | |
| debug_log(f"First 100 bytes type: {type(first_bytes)}") | |
| debug_log(f"First 100 bytes preview: {first_bytes[:50]}...") | |
| # Reset file pointer | |
| uploaded_file.seek(0) | |
| return file_info | |
| except Exception as e: | |
| error_info = detailed_error_info(e) | |
| debug_log(f"โ Error inspecting file: {error_info['type']}: {error_info['message']}", "ERROR") | |
| st.sidebar.error(f"File inspection error: {error_info['message']}") | |
| return None | |
| def safe_read_csv_debug(uploaded_file, encoding_options=['utf-8', 'latin1', 'iso-8859-1', 'cp1252']): | |
| """Safely read CSV with extensive debugging""" | |
| debug_log("๐ Starting CSV read process...") | |
| # Inspect file first | |
| file_info = inspect_uploaded_file(uploaded_file) | |
| if file_info is None: | |
| return None | |
| # Try different reading methods | |
| methods = [ | |
| ("Direct pandas read", lambda f: pd.read_csv(f)), | |
| ("BytesIO method", lambda f: pd.read_csv(io.BytesIO(f.read()))), | |
| ("StringIO method", lambda f: pd.read_csv(io.StringIO(f.read().decode('utf-8')))), | |
| ] | |
| for method_name, method_func in methods: | |
| debug_log(f"๐ Trying method: {method_name}") | |
| for encoding in encoding_options: | |
| try: | |
| debug_log(f" - Attempting encoding: {encoding}") | |
| uploaded_file.seek(0) | |
| if method_name == "Direct pandas read": | |
| df = pd.read_csv(uploaded_file, encoding=encoding) | |
| elif method_name == "BytesIO method": | |
| uploaded_file.seek(0) | |
| content = uploaded_file.read() | |
| df = pd.read_csv(io.BytesIO(content), encoding=encoding) | |
| elif method_name == "StringIO method": | |
| uploaded_file.seek(0) | |
| content = uploaded_file.read() | |
| if isinstance(content, bytes): | |
| content = content.decode(encoding) | |
| df = pd.read_csv(io.StringIO(content)) | |
| debug_log(f"โ Success with {method_name} + {encoding}") | |
| debug_log(f"DataFrame shape: {df.shape}") | |
| debug_log(f"Columns: {list(df.columns)}") | |
| st.success(f"File loaded successfully using {method_name} with {encoding} encoding") | |
| return df | |
| except UnicodeDecodeError as e: | |
| debug_log(f" - Unicode error with {encoding}: {str(e)}", "WARNING") | |
| continue | |
| except Exception as e: | |
| error_info = detailed_error_info(e) | |
| debug_log(f" - Error with {method_name} + {encoding}: {error_info['type']}: {error_info['message']}", "ERROR") | |
| # Show detailed error for 403 or permission errors | |
| if "403" in str(e) or "permission" in str(e).lower(): | |
| st.error("๐จ PERMISSION ERROR DETECTED!") | |
| st.error(f"Method: {method_name}, Encoding: {encoding}") | |
| st.error(f"Error type: {error_info['type']}") | |
| st.error(f"Error message: {error_info['message']}") | |
| st.code(error_info['traceback']) | |
| continue | |
| debug_log("โ All reading methods failed", "ERROR") | |
| st.error("All CSV reading methods failed. Check debug log for details.") | |
| return None | |
| # Utility functions with debugging | |
| def save_artifacts(obj, folder_name, file_name): | |
| """Save artifacts with debugging""" | |
| debug_log(f"๐พ Saving {file_name} to {folder_name}") | |
| try: | |
| os.makedirs(folder_name, exist_ok=True) | |
| full_path = os.path.join(folder_name, file_name) | |
| with open(full_path, 'wb') as f: | |
| pickle.dump(obj, f) | |
| debug_log(f"โ Successfully saved {file_name}") | |
| return True | |
| except Exception as e: | |
| error_info = detailed_error_info(e) | |
| debug_log(f"โ Error saving {file_name}: {error_info['message']}", "ERROR") | |
| st.error(f"Save error: {error_info['message']}") | |
| return False | |
| def load_artifacts(folder_name, file_name): | |
| """Load artifacts with debugging""" | |
| debug_log(f"๐ Loading {file_name} from {folder_name}") | |
| try: | |
| full_path = os.path.join(folder_name, file_name) | |
| if not os.path.exists(full_path): | |
| debug_log(f"โ File not found: {full_path}", "ERROR") | |
| return None | |
| with open(full_path, 'rb') as f: | |
| obj = pickle.load(f) | |
| debug_log(f"โ Successfully loaded {file_name}") | |
| return obj | |
| except Exception as e: | |
| error_info = detailed_error_info(e) | |
| debug_log(f"โ Error loading {file_name}: {error_info['message']}", "ERROR") | |
| st.error(f"Load error: {error_info['message']}") | |
| return None | |
| def load_model(model_name): | |
| """Load model with debugging""" | |
| debug_log(f"๐ค Loading model: {model_name}") | |
| return load_artifacts("models", model_name) | |
| def predict_text(model_name, text, vectorizer_type="tfidf"): | |
| """Make prediction with debugging""" | |
| debug_log(f"๐ฎ Starting prediction with {model_name}") | |
| try: | |
| # Load components | |
| model = load_model(model_name) | |
| if model is None: | |
| return None, None | |
| vectorizer_file = f"{vectorizer_type}_vectorizer.pkl" | |
| vectorizer = load_artifacts("artifacts", vectorizer_file) | |
| if vectorizer is None: | |
| return None, None | |
| encoder = load_artifacts("artifacts", "encoder.pkl") | |
| if encoder is None: | |
| return None, None | |
| debug_log("๐งน Cleaning text...") | |
| text_cleaner = TextCleaner() | |
| clean_text = text_cleaner.clean_text(text) | |
| debug_log(f"Cleaned text preview: {clean_text[:50]}...") | |
| debug_log("๐ข Vectorizing text...") | |
| text_vector = vectorizer.transform([clean_text]) | |
| debug_log(f"Vector shape: {text_vector.shape}") | |
| debug_log("๐ฏ Making prediction...") | |
| prediction = model.predict(text_vector) | |
| prediction_proba = None | |
| if hasattr(model, 'predict_proba'): | |
| try: | |
| prediction_proba = model.predict_proba(text_vector)[0] | |
| debug_log(f"Prediction probabilities: {prediction_proba}") | |
| except: | |
| debug_log("No prediction probabilities available", "WARNING") | |
| predicted_label = encoder.inverse_transform(prediction)[0] | |
| debug_log(f"โ Prediction complete: {predicted_label}") | |
| return predicted_label, prediction_proba | |
| except Exception as e: | |
| error_info = detailed_error_info(e) | |
| debug_log(f"โ Prediction error: {error_info['message']}", "ERROR") | |
| st.error(f"Prediction error: {error_info['message']}") | |
| if debug_mode: | |
| st.code(error_info['traceback']) | |
| return None, None | |
| # Main App | |
| st.title('๐ Debug Text Classification App') | |
| st.write('Debug version to identify and fix issues') | |
| # Environment info | |
| if debug_mode: | |
| st.sidebar.subheader("๐ฅ๏ธ Environment Info") | |
| st.sidebar.write(f"Python version: {sys.version}") | |
| st.sidebar.write(f"Streamlit version: {st.__version__}") | |
| st.sidebar.write(f"Pandas version: {pd.__version__}") | |
| st.sidebar.write(f"Current working directory: {os.getcwd()}") | |
| # Check directory permissions | |
| try: | |
| test_dir = "test_permissions" | |
| os.makedirs(test_dir, exist_ok=True) | |
| test_file = os.path.join(test_dir, "test.txt") | |
| with open(test_file, 'w') as f: | |
| f.write("test") | |
| os.remove(test_file) | |
| os.rmdir(test_dir) | |
| st.sidebar.success("โ File system permissions OK") | |
| except Exception as e: | |
| st.sidebar.error(f"โ File system permission issue: {e}") | |
| # Sidebar navigation | |
| section = st.sidebar.radio("Choose Section", ["File Upload Debug", "Data Analysis", "Train Model", "Predictions"]) | |
| # Session state initialization | |
| if 'vectorizer_type' not in st.session_state: | |
| st.session_state.vectorizer_type = "tfidf" | |
| if 'train_df' not in st.session_state: | |
| st.session_state.train_df = None | |
| # File Upload Debug Section | |
| if section == "File Upload Debug": | |
| st.subheader("๐ File Upload Debugging") | |
| st.info("This section helps debug file upload issues. Upload your file and see detailed error information.") | |
| train_data = st.file_uploader("Upload training data (DEBUG MODE)", type=["csv"], key="debug_upload") | |
| if train_data is not None: | |
| st.write("### File Upload Detected!") | |
| # Show raw file info | |
| st.write("**Raw File Information:**") | |
| st.json({ | |
| "name": train_data.name, | |
| "type": train_data.type if hasattr(train_data, 'type') else "Unknown", | |
| "size": train_data.size if hasattr(train_data, 'size') else "Unknown" | |
| }) | |
| # Try to read the file | |
| st.write("### Attempting to Read File...") | |
| with st.spinner("Reading file with debug mode..."): | |
| df = safe_read_csv_debug(train_data) | |
| if df is not None: | |
| st.success("๐ File successfully loaded!") | |
| st.write("**Data Preview:**") | |
| st.dataframe(df.head()) | |
| st.write(f"**Shape:** {df.shape}") | |
| st.write(f"**Columns:** {list(df.columns)}") | |
| st.write(f"**Data Types:**") | |
| st.write(df.dtypes) | |
| # Store in session state | |
| st.session_state.train_df = df | |
| else: | |
| st.error("โ Failed to load file. Check the debug log for details.") | |
| # Additional troubleshooting | |
| st.write("### ๐ง Troubleshooting Steps:") | |
| st.write("1. Check if your file is a valid CSV") | |
| st.write("2. Try saving your CSV with different encoding (UTF-8 recommended)") | |
| st.write("3. Check if file size is within limits") | |
| st.write("4. Ensure no special characters in filename") | |
| st.write("5. Try uploading from a different location") | |
| # Other sections (simplified for debugging) | |
| elif section == "Data Analysis": | |
| st.subheader("๐ Data Analysis") | |
| if st.session_state.train_df is not None: | |
| df = st.session_state.train_df | |
| st.write("Using loaded data from debug session:") | |
| st.dataframe(df.head()) | |
| # Basic analysis without custom modules if they fail | |
| st.write(f"**Shape:** {df.shape}") | |
| st.write(f"**Columns:** {list(df.columns)}") | |
| st.write(f"**Missing values:**") | |
| st.write(df.isnull().sum()) | |
| else: | |
| st.warning("No data loaded. Please use 'File Upload Debug' section first.") | |
| elif section == "Train Model": | |
| st.subheader("๐ค Train Model") | |
| st.info("Use this section after successfully loading data in debug mode.") | |
| if st.session_state.train_df is not None: | |
| st.success("Data available for training!") | |
| # Add your training logic here | |
| else: | |
| st.warning("No data loaded. Please use 'File Upload Debug' section first.") | |
| elif section == "Predictions": | |
| st.subheader("๐ฎ Predictions") | |
| st.info("Use this section after training a model.") | |
| # Check for trained models | |
| if os.path.exists("models"): | |
| models = [f for f in os.listdir("models") if f.endswith('.pkl')] | |
| if models: | |
| st.write(f"Available models: {models}") | |
| else: | |
| st.info("No trained models found.") | |
| else: | |
| st.info("Models directory not found.") | |
| # Debug summary | |
| if debug_mode: | |
| st.sidebar.markdown("---") | |
| st.sidebar.subheader("๐ Debug Summary") | |
| if st.session_state.train_df is not None: | |
| st.sidebar.success("โ Data loaded successfully") | |
| else: | |
| st.sidebar.warning("โ ๏ธ No data loaded") |