Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pickle | |
| import docx | |
| import PyPDF2 | |
| import re | |
| from huggingface_hub import hf_hub_download | |
| from io import BytesIO | |
| # Set page configuration | |
| st.set_page_config( | |
| page_title="IntelliCV: AI Resume Analyzer", | |
| page_icon="π", | |
| layout="centered", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Add some basic CSS styling directly in the app | |
| st.markdown(""" | |
| <style> | |
| .result-card { | |
| padding: 20px; | |
| border-radius: 10px; | |
| background-color: #f0f2f6; | |
| margin-bottom: 20px; | |
| } | |
| .category { | |
| font-size: 24px; | |
| font-weight: bold; | |
| color: #2e86c1; | |
| } | |
| .stSpinner > div { | |
| text-align: center; | |
| align-items: center; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Cache model loading with improved error handling | |
| def load_models(): | |
| try: | |
| repo_id = "psychomita/intellicv-models" | |
| clf_path = hf_hub_download(repo_id=repo_id, filename="clf.pkl", repo_type="model") | |
| tfidf_path = hf_hub_download(repo_id=repo_id, filename="tfidf.pkl", repo_type="model") | |
| encoder_path = hf_hub_download(repo_id=repo_id, filename="encoder.pkl", repo_type="model") | |
| svc_model = pickle.load(open(clf_path, 'rb')) | |
| tfidf = pickle.load(open(tfidf_path, 'rb')) | |
| le = pickle.load(open(encoder_path, 'rb')) | |
| return svc_model, tfidf, le | |
| except Exception as e: | |
| st.error(f"Failed to load models: {str(e)}") | |
| return None, None, None | |
| # Load models | |
| svc_model, tfidf, le = load_models() | |
| def cleanResume(txt): | |
| """Improved text cleaning function with more comprehensive patterns""" | |
| if not isinstance(txt, str): | |
| return "" | |
| clean_patterns = [ | |
| (r'http\S+|www\S+|https\S+', ' '), # URLs | |
| (r'\bRT\b|\bretweet\b', ' '), # Retweets | |
| (r'#\S+', ' '), # Hashtags | |
| (r'@\S+', ' '), # Mentions | |
| (r'[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' '), # Punctuation | |
| (r'[^\x00-\x7f]', ' '), # Non-ASCII chars | |
| (r'\s+', ' '), # Extra whitespace | |
| (r'\b\d+\b', ' '), # Standalone numbers | |
| (r'\b[\w\.-]+@[\w\.-]+\.\w+\b', ' ') # Email addresses | |
| ] | |
| cleanText = txt | |
| for pattern, repl in clean_patterns: | |
| cleanText = re.sub(pattern, repl, cleanText) | |
| return cleanText.strip() | |
| def extract_text_from_pdf(file): | |
| """Improved PDF text extraction with error handling""" | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(BytesIO(file.read())) | |
| text = [] | |
| for page in pdf_reader.pages: | |
| try: | |
| page_text = page.extract_text() or '' | |
| text.append(page_text) | |
| except Exception as e: | |
| st.warning(f"Could not extract text from one page: {str(e)}") | |
| continue | |
| return ' '.join(text) | |
| except Exception as e: | |
| raise ValueError(f"Failed to read PDF file: {str(e)}") | |
| def extract_text_from_docx(file): | |
| """Improved DOCX text extraction with error handling""" | |
| try: | |
| doc = docx.Document(BytesIO(file.read())) | |
| return '\n'.join(paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip()) | |
| except Exception as e: | |
| raise ValueError(f"Failed to read DOCX file: {str(e)}") | |
| def extract_text_from_txt(file): | |
| """Improved text file extraction with multiple encoding attempts""" | |
| encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'windows-1252'] | |
| for encoding in encodings: | |
| try: | |
| return BytesIO(file.read()).read().decode(encoding) | |
| except UnicodeDecodeError: | |
| file.seek(0) | |
| raise ValueError("Failed to decode text file with common encodings") | |
| def handle_file_upload(uploaded_file): | |
| """Handle file upload with better type checking""" | |
| if not uploaded_file: | |
| return None | |
| file_extension = uploaded_file.name.split('.')[-1].lower() | |
| try: | |
| if file_extension == 'pdf': | |
| return extract_text_from_pdf(uploaded_file) | |
| elif file_extension == 'docx': | |
| return extract_text_from_docx(uploaded_file) | |
| elif file_extension == 'txt': | |
| return extract_text_from_txt(uploaded_file) | |
| else: | |
| raise ValueError("Unsupported file type. Please upload PDF, DOCX, or TXT.") | |
| except Exception as e: | |
| raise ValueError(f"Error processing file: {str(e)}") | |
| def predict_category(input_resume): | |
| """Make prediction with input validation""" | |
| if not input_resume or not isinstance(input_resume, str): | |
| return "Unknown" | |
| try: | |
| cleaned_text = cleanResume(input_resume) | |
| if not cleaned_text.strip(): | |
| return "Unknown (insufficient text)" | |
| vectorized_text = tfidf.transform([cleaned_text]).toarray() | |
| predicted_category = svc_model.predict(vectorized_text) | |
| return le.inverse_transform(predicted_category)[0] | |
| except Exception as e: | |
| st.error(f"Prediction error: {str(e)}") | |
| return "Unknown (prediction failed)" | |
| def display_results(resume_text, category): | |
| """Display results in a more engaging way""" | |
| st.subheader("Analysis Results") | |
| # Category display with emoji | |
| category_emojis = { | |
| "Data Science": "π", | |
| "HR": "π₯", | |
| "Design": "π¨", | |
| "Information Technology": "π»", | |
| "Education": "π", | |
| "Business Development": "π", | |
| "Marketing": "π’", | |
| "Sales": "π°", | |
| "Health and Fitness": "πͺ", | |
| "Engineering": "βοΈ" | |
| } | |
| emoji = category_emojis.get(category, "π") | |
| st.markdown(f""" | |
| <div class="result-card"> | |
| <h3>Predicted Job Category</h3> | |
| <p class="category">{emoji} {category}</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Text analysis section | |
| with st.expander("Text Analysis Details"): | |
| st.markdown("**Cleaned Text Excerpt:**") | |
| cleaned_text = cleanResume(resume_text) | |
| st.text(cleaned_text[:500] + "..." if len(cleaned_text) > 500 else cleaned_text) | |
| st.markdown("**Statistics:**") | |
| col1, col2, col3 = st.columns(3) | |
| col1.metric("Original Length", f"{len(resume_text):,} chars") | |
| col2.metric("Cleaned Length", f"{len(cleaned_text):,} chars") | |
| col3.metric("Reduction", f"{100 - (len(cleaned_text)/len(resume_text)*100 if resume_text else 0):.1f}%") | |
| def main(): | |
| """Main application function with improved UI""" | |
| st.title("π IntelliCV: AI-Powered Resume Analyzer") | |
| st.markdown(""" | |
| Upload your resume and discover which job category it best matches with our AI analysis. | |
| Supported formats: PDF, DOCX, and TXT. | |
| """) | |
| # Sidebar with additional info | |
| with st.sidebar: | |
| st.header("About") | |
| st.markdown(""" | |
| IntelliCV uses machine learning to analyze resume content and predict the most suitable job category. | |
| **How it works:** | |
| 1. Upload your resume | |
| 2. AI extracts and cleans the text | |
| 3. Our model predicts the job category | |
| 4. View detailed analysis | |
| """) | |
| st.markdown("---") | |
| st.markdown(""" | |
| **Common Categories:** | |
| - Data Science | |
| - HR | |
| - Design | |
| - Information Technology | |
| - Education | |
| - And more... | |
| """) | |
| # File upload section | |
| uploaded_file = st.file_uploader( | |
| "Upload your resume", | |
| type=["pdf", "docx", "txt"], | |
| help="Supported formats: PDF, DOCX, TXT (max 10MB)" | |
| ) | |
| if uploaded_file is not None: | |
| # Check file size | |
| if uploaded_file.size > 10 * 1024 * 1024: # 10MB limit | |
| st.error("File size exceeds 10MB limit. Please upload a smaller file.") | |
| return | |
| with st.spinner("π Analyzing your resume..."): | |
| try: | |
| resume_text = handle_file_upload(uploaded_file) | |
| if not resume_text.strip(): | |
| st.warning("The uploaded file appears to be empty or couldn't be read properly.") | |
| return | |
| category = predict_category(resume_text) | |
| display_results(resume_text, category) | |
| # Download button for cleaned text | |
| st.download_button( | |
| label="Download Cleaned Text", | |
| data=resume_text, | |
| file_name="cleaned_resume.txt", | |
| mime="text/plain" | |
| ) | |
| except Exception as e: | |
| st.error(f"An error occurred: {str(e)}") | |
| st.info("Please try another file or check the format.") | |
| if __name__ == "__main__": | |
| if svc_model and tfidf and le: | |
| main() | |
| else: | |
| st.error("Failed to load required models. Please try again later.") |