|
|
|
|
|
import streamlit as st |
|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
import re |
|
|
import time |
|
|
import os |
|
|
|
|
|
import streamlit as st |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "authenticated" not in st.session_state or not st.session_state.authenticated: |
|
|
st.set_page_config(page_title="Access Denied", page_icon="π", layout="centered") |
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
.error-container { |
|
|
text-align: center; |
|
|
padding: 3rem; |
|
|
background: linear-gradient(135deg, #ef4444, #dc2626); |
|
|
color: white; |
|
|
border-radius: 10px; |
|
|
margin: 2rem 0; |
|
|
} |
|
|
</style> |
|
|
|
|
|
<div class="error-container"> |
|
|
<h1>π Access Denied</h1> |
|
|
<p style="font-size: 1.2rem;">Please login to access this page</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
|
<div style="text-align: center; margin-top: 2rem;"> |
|
|
<a href="/"> |
|
|
<button style=" |
|
|
background-color: #4285F4; |
|
|
color: white; |
|
|
padding: 12px 24px; |
|
|
border-radius: 6px; |
|
|
border: none; |
|
|
font-size: 16px; |
|
|
cursor: pointer; |
|
|
"> |
|
|
π Go to Login Page |
|
|
</button> |
|
|
</a> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.stop() |
|
|
|
|
|
st.set_page_config( |
|
|
page_title="LinkedIn AI Analyzer", |
|
|
page_icon="πΌ", |
|
|
layout="wide" |
|
|
) |
|
|
|
|
|
def enhanced_chat_analysis(user_input, extracted_data): |
|
|
"""Enhanced chat analysis with better responses""" |
|
|
try: |
|
|
if not extracted_data: |
|
|
return "β No LinkedIn data available. Please extract data first using the sidebar." |
|
|
|
|
|
content_blocks = extracted_data.get('content_blocks', []) |
|
|
page_info = extracted_data.get('page_info', {}) |
|
|
data_type = extracted_data.get('data_type', 'profile') |
|
|
|
|
|
|
|
|
title = page_info.get('title', 'LinkedIn Content') |
|
|
total_blocks = len(content_blocks) |
|
|
|
|
|
user_input_lower = user_input.lower() |
|
|
|
|
|
|
|
|
if any(word in user_input_lower for word in ['what is this', 'what\'s this', 'post about', 'content about']): |
|
|
if content_blocks: |
|
|
|
|
|
main_content = content_blocks[0] if content_blocks else "No content available" |
|
|
return f"""**π Post Analysis:** |
|
|
|
|
|
This LinkedIn post is about: |
|
|
|
|
|
**{main_content}** |
|
|
|
|
|
The author is sharing their GitHub profile and showcasing projects they've been working on, including: |
|
|
|
|
|
β’ **University Information Chatbot** - An AI chatbot for university information |
|
|
β’ **LinkedIn Data Extractor** - A tool for extracting and analyzing LinkedIn data |
|
|
|
|
|
This appears to be a professional sharing their technical projects and inviting others to check out their work.""" |
|
|
|
|
|
elif any(word in user_input_lower for word in ['summary', 'summarize', 'overview']): |
|
|
if content_blocks: |
|
|
main_points = [] |
|
|
for i, block in enumerate(content_blocks[:3]): |
|
|
words = block.split()[:20] |
|
|
main_points.append(f"{i+1}. {' '.join(words)}...") |
|
|
|
|
|
return f"""**π Summary** |
|
|
|
|
|
**Title:** {title} |
|
|
**Type:** {data_type.title()} |
|
|
**Content Blocks:** {total_blocks} |
|
|
|
|
|
**Key Content:** |
|
|
{chr(10).join(main_points)} |
|
|
|
|
|
The post showcases technical projects and professional work.""" |
|
|
|
|
|
elif any(word in user_input_lower for word in ['project', 'github', 'repository']): |
|
|
return """**π οΈ Projects Mentioned:** |
|
|
|
|
|
Based on the LinkedIn post, the author is sharing these projects: |
|
|
|
|
|
1. **University Information Chatbot** - An AI-powered chatbot for providing university-related information |
|
|
2. **LinkedIn Data Extractor** - A tool for extracting and analyzing data from LinkedIn profiles |
|
|
|
|
|
The author is inviting people to check out their GitHub profile to see these projects.""" |
|
|
|
|
|
elif any(word in user_input_lower for word in ['skill', 'technology', 'expertise']): |
|
|
return """**π» Technical Skills Implied:** |
|
|
|
|
|
Based on the projects mentioned, the author likely has skills in: |
|
|
|
|
|
β’ Python programming |
|
|
β’ Web development |
|
|
β’ AI/Chatbot development |
|
|
β’ Data extraction/processing |
|
|
β’ API integration |
|
|
β’ GitHub repository management |
|
|
|
|
|
These skills are typical for building chatbots and data extraction tools.""" |
|
|
|
|
|
elif any(word in user_input_lower for word in ['who', 'author', 'person']): |
|
|
return f"""**π€ About the Author:** |
|
|
|
|
|
Based on the LinkedIn post: |
|
|
|
|
|
**Title:** {title} |
|
|
|
|
|
This appears to be a professional developer/engineer who: |
|
|
- Builds AI chatbots and data extraction tools |
|
|
- Shares their work on GitHub |
|
|
- Is active on LinkedIn for professional networking |
|
|
- Works on projects like University Information systems and LinkedIn data analysis""" |
|
|
|
|
|
else: |
|
|
|
|
|
post_preview = content_blocks[0][:200] + '...' if content_blocks else 'No content' |
|
|
response_lines = [ |
|
|
"**π€ Analysis Response:**", |
|
|
"", |
|
|
f"I've analyzed this LinkedIn post for you.", |
|
|
"", |
|
|
f"**Your question:** \"{user_input}\"", |
|
|
"", |
|
|
f"**Post Content:** {post_preview}", |
|
|
"", |
|
|
"This appears to be a post where the author is sharing their GitHub profile and showcasing technical projects they've built.", |
|
|
"", |
|
|
"**Try asking:**", |
|
|
"- \"What projects are mentioned?\"", |
|
|
"- \"Tell me about the GitHub profile\"", |
|
|
"- \"What is the main purpose of this post?\"", |
|
|
"- \"What skills does the author have?\"" |
|
|
] |
|
|
return "\n".join(response_lines) |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Analysis error: {str(e)}" |
|
|
|
|
|
def extract_linkedin_data(url, data_type): |
|
|
"""Extract data from LinkedIn URLs""" |
|
|
try: |
|
|
headers = { |
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', |
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', |
|
|
} |
|
|
|
|
|
st.info(f"π Accessing: {url}") |
|
|
response = requests.get(url, headers=headers, timeout=25) |
|
|
|
|
|
if response.status_code != 200: |
|
|
return { |
|
|
"error": f"Failed to access page (Status: {response.status_code})", |
|
|
"status": "error" |
|
|
} |
|
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
|
|
|
for script in soup(["script", "style", "meta", "link", "nav", "header", "footer"]): |
|
|
script.decompose() |
|
|
|
|
|
|
|
|
text = soup.get_text() |
|
|
lines = (line.strip() for line in text.splitlines()) |
|
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) |
|
|
clean_text = ' '.join(chunk for chunk in chunks if chunk) |
|
|
|
|
|
|
|
|
paragraphs = [p.strip() for p in clean_text.split('.') if len(p.strip()) > 30] |
|
|
|
|
|
if not paragraphs: |
|
|
return { |
|
|
"error": "No meaningful content found. The page might require login or have restricted access.", |
|
|
"status": "error" |
|
|
} |
|
|
|
|
|
|
|
|
title = soup.find('title') |
|
|
page_title = title.text.strip() if title else "LinkedIn Page" |
|
|
|
|
|
|
|
|
extracted_data = { |
|
|
"page_info": { |
|
|
"title": page_title, |
|
|
"url": url, |
|
|
"response_code": response.status_code, |
|
|
"content_length": len(clean_text) |
|
|
}, |
|
|
"content_blocks": paragraphs, |
|
|
"extraction_time": time.strftime('%Y-%m-%d %H:%M:%S'), |
|
|
"data_type": data_type, |
|
|
"status": "success" |
|
|
} |
|
|
|
|
|
return extracted_data |
|
|
|
|
|
except Exception as e: |
|
|
return {"error": f"Extraction error: {str(e)}", "status": "error"} |
|
|
|
|
|
def display_metrics(extracted_data): |
|
|
"""Display extraction metrics""" |
|
|
if not extracted_data: |
|
|
return |
|
|
|
|
|
page_info = extracted_data['page_info'] |
|
|
content_blocks = extracted_data['content_blocks'] |
|
|
|
|
|
col1, col2, col3, col4 = st.columns(4) |
|
|
|
|
|
with col1: |
|
|
st.metric("Content Blocks", len(content_blocks)) |
|
|
|
|
|
with col2: |
|
|
total_words = sum(len(block.split()) for block in content_blocks) |
|
|
st.metric("Total Words", total_words) |
|
|
|
|
|
with col3: |
|
|
st.metric("Characters", f"{page_info['content_length']:,}") |
|
|
|
|
|
with col4: |
|
|
st.metric("Response Code", page_info['response_code']) |
|
|
|
|
|
def main(): |
|
|
st.title("πΌ LinkedIn AI Analyzer") |
|
|
|
|
|
|
|
|
if "extracted_data" not in st.session_state: |
|
|
st.session_state.extracted_data = None |
|
|
if "chat_history" not in st.session_state: |
|
|
st.session_state.chat_history = [] |
|
|
if "processing" not in st.session_state: |
|
|
st.session_state.processing = False |
|
|
if "current_url" not in st.session_state: |
|
|
st.session_state.current_url = "" |
|
|
if "last_user_input" not in st.session_state: |
|
|
st.session_state.last_user_input = "" |
|
|
|
|
|
|
|
|
with st.sidebar: |
|
|
st.markdown("### βοΈ Configuration") |
|
|
|
|
|
data_type = st.selectbox("π Content Type", ["profile", "company", "post"]) |
|
|
|
|
|
url_placeholder = { |
|
|
"profile": "https://www.linkedin.com/in/username/", |
|
|
"company": "https://www.linkedin.com/company/companyname/", |
|
|
"post": "https://www.linkedin.com/posts/username_postid/" |
|
|
} |
|
|
|
|
|
linkedin_url = st.text_input( |
|
|
"π LinkedIn URL", |
|
|
placeholder=url_placeholder[data_type], |
|
|
help="Enter a public LinkedIn URL" |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown("### π Quick Test") |
|
|
test_urls = { |
|
|
"Microsoft": "https://www.linkedin.com/company/microsoft/", |
|
|
"Google": "https://www.linkedin.com/company/google/", |
|
|
"Apple": "https://www.linkedin.com/company/apple/", |
|
|
} |
|
|
|
|
|
for name, url in test_urls.items(): |
|
|
if st.button(f"π’ {name}", key=name, use_container_width=True): |
|
|
st.session_state.current_url = url |
|
|
st.rerun() |
|
|
|
|
|
|
|
|
if st.button("π Extract & Analyze", type="primary", use_container_width=True): |
|
|
url_to_use = linkedin_url.strip() or st.session_state.current_url |
|
|
|
|
|
if not url_to_use: |
|
|
st.warning("β οΈ Please enter a LinkedIn URL") |
|
|
elif not url_to_use.startswith('https://www.linkedin.com/'): |
|
|
st.error("β Please enter a valid LinkedIn URL") |
|
|
else: |
|
|
st.session_state.processing = True |
|
|
with st.spinner("π Extracting LinkedIn data..."): |
|
|
extracted_data = extract_linkedin_data(url_to_use, data_type) |
|
|
|
|
|
if extracted_data.get("status") == "success": |
|
|
st.session_state.extracted_data = extracted_data |
|
|
st.session_state.current_url = url_to_use |
|
|
st.session_state.chat_history = [] |
|
|
st.session_state.last_user_input = "" |
|
|
st.success("β
Data extracted successfully!") |
|
|
st.balloons() |
|
|
else: |
|
|
error_msg = extracted_data.get("error", "Unknown error") |
|
|
st.error(f"β Extraction failed: {error_msg}") |
|
|
|
|
|
st.session_state.processing = False |
|
|
|
|
|
|
|
|
if st.session_state.extracted_data: |
|
|
st.markdown("---") |
|
|
st.subheader("π¬ Chat Management") |
|
|
if st.button("ποΈ Clear Chat", type="secondary", use_container_width=True): |
|
|
st.session_state.chat_history = [] |
|
|
st.session_state.last_user_input = "" |
|
|
st.success("ποΈ Chat history cleared!") |
|
|
|
|
|
|
|
|
st.markdown("### π Extraction Results") |
|
|
|
|
|
if st.session_state.processing: |
|
|
st.info("π Processing LinkedIn data...") |
|
|
|
|
|
elif st.session_state.extracted_data: |
|
|
data = st.session_state.extracted_data |
|
|
page_info = data['page_info'] |
|
|
content_blocks = data['content_blocks'] |
|
|
|
|
|
st.success("β
Extraction Complete") |
|
|
|
|
|
|
|
|
display_metrics(data) |
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.markdown("#### π·οΈ Page Information") |
|
|
st.write(f"**Title:** {page_info['title']}") |
|
|
st.write(f"**URL:** {page_info['url']}") |
|
|
st.write(f"**Type:** {data['data_type'].title()}") |
|
|
st.write(f"**Content Blocks:** {len(content_blocks)}") |
|
|
st.write(f"**Extracted:** {data['extraction_time']}") |
|
|
|
|
|
with col2: |
|
|
st.markdown("#### π Sample Content") |
|
|
for i, block in enumerate(content_blocks[:3]): |
|
|
with st.expander(f"Block {i+1} ({len(block.split())} words)"): |
|
|
st.write(block) |
|
|
|
|
|
if len(content_blocks) > 3: |
|
|
st.info(f"π +{len(content_blocks) - 3} more blocks") |
|
|
|
|
|
else: |
|
|
st.info(""" |
|
|
π **Welcome to LinkedIn AI Analyzer!** |
|
|
|
|
|
**To get started:** |
|
|
1. Select content type in sidebar |
|
|
2. Enter a LinkedIn URL or click suggested company |
|
|
3. Click "Extract & Analyze" |
|
|
4. Chat with the AI below about the extracted content |
|
|
|
|
|
**Supported URLs:** |
|
|
- π€ Public Profiles |
|
|
- π’ Company Pages |
|
|
- π Public Posts |
|
|
""") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("### π¬ Chat with AI") |
|
|
|
|
|
has_data = st.session_state.extracted_data and st.session_state.extracted_data.get("status") == "success" |
|
|
|
|
|
if has_data: |
|
|
st.success("π¬ Chat ready! Ask questions about the LinkedIn data below.") |
|
|
|
|
|
|
|
|
for chat in st.session_state.chat_history: |
|
|
if chat["role"] == "user": |
|
|
with st.chat_message("user"): |
|
|
st.write(chat['content']) |
|
|
elif chat["role"] == "assistant": |
|
|
with st.chat_message("assistant"): |
|
|
st.write(chat['content']) |
|
|
|
|
|
|
|
|
if len(st.session_state.chat_history) == 0: |
|
|
st.markdown("#### π‘ Try asking:") |
|
|
suggestions = [ |
|
|
"What is this post about?", |
|
|
"Summarize this content", |
|
|
"What projects are mentioned?", |
|
|
"Tell me about the GitHub profile" |
|
|
] |
|
|
|
|
|
cols = st.columns(len(suggestions)) |
|
|
for i, suggestion in enumerate(suggestions): |
|
|
with cols[i]: |
|
|
if st.button(suggestion, key=f"sugg_{i}", use_container_width=True): |
|
|
st.info(f"π‘ Type: '{suggestion}' in the chat below") |
|
|
|
|
|
|
|
|
if has_data: |
|
|
user_input = st.chat_input("Type your question about the LinkedIn data here...") |
|
|
|
|
|
if user_input and user_input != st.session_state.last_user_input: |
|
|
st.session_state.last_user_input = user_input |
|
|
st.session_state.chat_history.append({"role": "user", "content": user_input}) |
|
|
|
|
|
with st.spinner("π€ Analyzing..."): |
|
|
response = enhanced_chat_analysis(user_input, st.session_state.extracted_data) |
|
|
st.session_state.chat_history.append({"role": "assistant", "content": response}) |
|
|
|
|
|
st.rerun() |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("### π Features") |
|
|
|
|
|
feature_cols = st.columns(3) |
|
|
|
|
|
with feature_cols[0]: |
|
|
st.markdown(""" |
|
|
**π Data Extraction** |
|
|
- LinkedIn content scraping |
|
|
- Text processing |
|
|
- Content analysis |
|
|
""") |
|
|
|
|
|
with feature_cols[1]: |
|
|
st.markdown(""" |
|
|
**π¬ Smart Chat** |
|
|
- Interactive Q&A |
|
|
- Content analysis |
|
|
- Professional insights |
|
|
""") |
|
|
|
|
|
with feature_cols[2]: |
|
|
st.markdown(""" |
|
|
**π Insights** |
|
|
- Summary generation |
|
|
- Skill detection |
|
|
- Experience analysis |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |