Spaces:

Refat81
/

Social_Media_Data_Extractor_Chatbot

Sleeping

App Files Files Community

Refat81 commited on Oct 21, 2025

Commit

67a4166

verified ·

1 Parent(s): e8a2c75

Update pages/facebook_extractor.py

Browse files

Files changed (1) hide show

pages/facebook_extractor.py +335 -180

pages/facebook_extractor.py CHANGED Viewed

@@ -23,167 +23,297 @@ st.set_page_config(
     layout="wide"
 )
-class FacebookPublicExtractor:
-    """Facebook public data extractor that works on Hugging Face"""
     def __init__(self):
-        self.session = requests.Session()
-        self.setup_session()
-    def setup_session(self):
-        """Setup requests session with headers"""
-        self.session.headers.update({
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-            'Accept-Language': 'en-US,en;q=0.5',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'DNT': '1',
-            'Connection': 'keep-alive',
-            'Upgrade-Insecure-Requests': '1',
-        })
-    def extract_public_data(self, url: str, data_type: str) -> Dict:
-        """Extract public data from Facebook URLs"""
         try:
-            st.info(f"🌐 Accessing: {url}")
-            # Validate URL type
-            url_type = self.analyze_facebook_url(url)
-            response = self.session.get(url, timeout=15)
-            if response.status_code != 200:
-                return {
-                    "error": f"Failed to access page (Status: {response.status_code})",
-                    "url_type": url_type,
-                    "status": "error"
-                }
-            soup = BeautifulSoup(response.text, 'html.parser')
-            # Remove scripts and styles
-            for script in soup(["script", "style", "meta", "link"]):
-                script.decompose()
-            # Extract basic information
-            text = soup.get_text()
-            lines = (line.strip() for line in text.splitlines())
-            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-            clean_text = ' '.join(chunk for chunk in chunks if chunk)
-            # Extract structured data
-            page_info = self._extract_page_info(soup, url, response)
-            content_blocks = self._extract_content_blocks(clean_text, url_type)
-            return {
-                "page_info": page_info,
-                "content_blocks": content_blocks,
-                "url_type": url_type,
-                "extraction_time": datetime.now().isoformat(),
-                "data_type": data_type,
-                "status": "success"
-            }
         except Exception as e:
-            return {"error": f"Extraction failed: {str(e)}", "status": "error"}
-    def analyze_facebook_url(self, url: str) -> str:
-        """Analyze Facebook URL type"""
-        url_lower = url.lower()
-        if 'facebook.com/groups/' in url_lower:
-            return "Facebook Group (Limited public data)"
-        elif 'facebook.com/pages/' in url_lower or '/pages/' in url_lower:
-            return "Facebook Page (Public data available)"
-        elif 'facebook.com/events/' in url_lower:
-            return "Facebook Event (Limited access)"
-        elif 'facebook.com/marketplace/' in url_lower:
-            return "Facebook Marketplace"
-        elif 'facebook.com/' in url_lower and '/posts/' in url_lower:
-            return "Facebook Post"
-        else:
-            return "Facebook Profile/Page"
-    def _extract_page_info(self, soup, url: str, response) -> Dict:
-        """Extract page information"""
-        title = soup.find('title')
-        meta_desc = soup.find('meta', attrs={'name': 'description'})
-        og_title = soup.find('meta', property='og:title')
-        og_description = soup.find('meta', property='og:description')
-        og_image = soup.find('meta', property='og:image')
-        return {
-            "title": title.text.strip() if title else "Facebook Content",
-            "description": meta_desc['content'] if meta_desc else "",
-            "og_title": og_title['content'] if og_title else "",
-            "og_description": og_description['content'] if og_description else "",
-            "og_image": og_image['content'] if og_image else "",
-            "url": url,
-            "response_code": response.status_code,
-            "content_length": len(response.text),
-            "access_note": self._get_access_note(soup)
-        }
-    def _extract_content_blocks(self, text: str, url_type: str) -> List[Dict]:
-        """Extract meaningful content blocks"""
         blocks = []
-        # Split into paragraphs/sentences
-        paragraphs = [p.strip() for p in text.split('.') if p.strip() and len(p.strip()) > 20]
-        for i, paragraph in enumerate(paragraphs[:15]):  # Limit to first 15
-            content_type = self._analyze_content_type(paragraph, url_type)
-            block = {
                 "id": i + 1,
                 "content": paragraph,
                 "length": len(paragraph),
                 "word_count": len(paragraph.split()),
-                "content_type": content_type,
-                "has_links": 'http' in paragraph.lower(),
-                "is_public_content": self._is_public_content(paragraph)
-            }
-            blocks.append(block)
         return blocks
-    def _analyze_content_type(self, text: str, url_type: str) -> str:
-        """Analyze content type"""
-        text_lower = text.lower()
-        if any(word in text_lower for word in ['event', 'date', 'time', 'location']):
-            return "event_info"
-        elif any(word in text_lower for word in ['group', 'community', 'member', 'join']):
-            return "community_info"
-        elif any(word in text_lower for word in ['marketplace', 'buy', 'sell', 'price']):
-            return "commerce"
-        elif any(word in text_lower for word in ['post', 'share', 'comment']):
-            return "social_content"
-        elif any(word in text_lower for word in ['login', 'sign in']):
-            return "authentication"
         else:
-            return "general_content"
-    def _is_public_content(self, text: str) -> bool:
-        """Check if content appears to be publicly accessible"""
-        text_lower = text.lower()
-        private_indicators = [
-            'log in to see', 'sign up to see', 'you must log in',
-            'private content', 'restricted access'
-        ]
-        return not any(indicator in text_lower for indicator in private_indicators)
-    def _get_access_note(self, soup) -> str:
-        """Get access level note"""
-        page_text = soup.get_text().lower()
-        if any(phrase in page_text for phrase in ['log in', 'sign in', 'you must be logged in']):
-            return "Login required for full access"
-        elif 'content not available' in page_text:
-            return "Content not publicly available"
-        else:
-            return "Public content accessible"
 # AI Functions (same as your LinkedIn analyzer)
 def get_embeddings():
@@ -227,20 +357,27 @@ def process_facebook_data(extracted_data):
     page_info = extracted_data['page_info']
     content_blocks = extracted_data['content_blocks']
     url_type = extracted_data['url_type']
     all_text = f"FACEBOOK DATA ANALYSIS\n{'='*50}\n\n"
     all_text += f"📄 PAGE INFORMATION:\n"
     all_text += f"Title: {page_info['title']}\n"
-    all_text += f"URL: {page_info['url']}\n"
     all_text += f"URL Type: {url_type}\n"
-    all_text += f"Access: {page_info['access_note']}\n"
     all_text += f"Extracted: {extracted_data['extraction_time']}\n\n"
     all_text += f"📊 CONTENT ANALYSIS:\n"
     all_text += f"Content Blocks: {len(content_blocks)}\n"
     all_text += f"Public Content: {sum(1 for b in content_blocks if b['is_public_content'])} blocks\n\n"
-    for i, block in enumerate(content_blocks[:10]):
         all_text += f"--- BLOCK {i+1} ---\n"
         all_text += f"Type: {block['content_type']}\n"
         all_text += f"Words: {block['word_count']} | Public: {block['is_public_content']}\n"
@@ -296,15 +433,15 @@ def create_chatbot(vectorstore):
         return None
 def main():
-    st.title("📘 Facebook Public Data Extractor")
-    st.markdown("Extract and analyze public Facebook data - Works on Hugging Face Spaces")
     if st.button("← Back to Main Dashboard"):
         st.switch_page("app.py")
     # Initialize session state
     if "extractor" not in st.session_state:
-        st.session_state.extractor = FacebookPublicExtractor()
     if "facebook_data" not in st.session_state:
         st.session_state.facebook_data = None
     if "vectorstore" not in st.session_state:
@@ -320,22 +457,22 @@ def main():
         data_type = st.selectbox(
             "Content Type",
-            ["page", "group", "event", "post", "marketplace"],
             help="Select the type of Facebook content"
         )
         facebook_url = st.text_input(
             "Facebook URL",
-            placeholder="https://www.facebook.com/Meta/",
-            help="Enter public Facebook URL (pages work best)"
         )
-        # Suggested URLs that often work
         st.markdown("### 🚀 Test URLs")
         test_urls = {
-            "Meta (Facebook)": "https://www.facebook.com/Meta/",
-            "Starbucks": "https://www.facebook.com/Starbucks/",
-            "NASA": "https://www.facebook.com/NASA/",
         }
         for name, url in test_urls.items():
@@ -351,8 +488,8 @@ def main():
             elif 'facebook.com' not in url_to_use:
                 st.error("❌ Please enter a valid Facebook URL")
             else:
-                with st.spinner("🔄 Extracting Facebook data..."):
-                    extracted_data = st.session_state.extractor.extract_public_data(url_to_use, data_type)
                     if extracted_data.get("status") == "success":
                         st.session_state.facebook_data = extracted_data
@@ -363,7 +500,12 @@ def main():
                             st.session_state.vectorstore = vectorstore
                             st.session_state.chatbot = create_chatbot(vectorstore)
                             st.session_state.chat_history = []
-                            st.success(f"✅ Extracted {len(extracted_data['content_blocks'])} content blocks!")
                         else:
                             st.error("❌ Failed to process data for AI")
                     else:
@@ -389,52 +531,65 @@ def main():
             data = st.session_state.facebook_data
             page_info = data['page_info']
             content_blocks = data['content_blocks']
-            st.success("✅ Facebook Data Extracted")
             # Metrics
             col1, col2, col3 = st.columns(3)
             with col1:
                 st.metric("Content Blocks", len(content_blocks))
             with col2:
-                st.metric("Public Content", sum(1 for b in content_blocks if b['is_public_content']))
             with col3:
-                st.metric("Response Code", page_info['response_code'])
             # Page info
             st.subheader("🏷️ Page Information")
             st.write(f"**Title:** {page_info['title']}")
             st.write(f"**URL Type:** {data['url_type']}")
-            st.write(f"**Access:** {page_info['access_note']}")
-            st.write(f"**Extracted:** {data['extraction_time'][:19]}")
             # Content samples
-            st.subheader("📝 Sample Content")
-            for i, block in enumerate(content_blocks[:3]):
-                with st.expander(f"Block {i+1} - {block['content_type']} ({block['word_count']} words)"):
                     st.write(block['content'])
-                    st.caption(f"Public: {block['is_public_content']} | Links: {block['has_links']}")
         else:
             st.info("""
-            ## 📘 Facebook Public Data Extractor
             **How it works:**
-            1. Enter a public Facebook URL (pages work best)
-            2. Click "Extract Facebook Data"
-            3. View extracted public content
-            4. Chat with AI about the data
-            **Supported URLs:**
-            - 🏢 Public Facebook Pages
-            - 📘 Public Groups (limited data)
-            - 🎉 Public Events
-            - 📝 Public Posts
-            **Limitations:**
-            - Only public content accessible
-            - No private group data
-            - Limited without login
             """)
     with col2:
@@ -471,10 +626,10 @@ def main():
             if not st.session_state.chat_history:
                 st.subheader("💡 Try asking:")
                 suggestions = [
-                    "What is this Facebook page about?",
-                    "Summarize the available public content",
-                    "What type of content is most common?",
-                    "Analyze the page's public information"
                 ]
                 for suggestion in suggestions:
@@ -482,7 +637,7 @@ def main():
                         st.info(f"Type: '{suggestion}' in chat")
         elif st.session_state.facebook_data:
-            st.info("💬 Start chatting with the AI about the Facebook data")
         else:
             st.info("🔍 Extract Facebook data to enable AI chat")

     layout="wide"
 )
+class FacebookDataSimulator:
+    """Simulate Facebook data extraction with demo data"""
     def __init__(self):
+        self.demo_data = self._create_demo_data()
+    def extract_data(self, url: str, data_type: str) -> Dict:
+        """Extract or simulate Facebook data"""
         try:
+            st.info(f"🔍 Analyzing: {url}")
+            # Try real extraction first
+            real_data = self._try_real_extraction(url)
+            if real_data.get("status") == "success":
+                return real_data
+            # If real extraction fails, use demo data
+            st.warning("⚠️ Using demo data (Facebook restrictions active)")
+            return self._get_demo_data(url, data_type)
         except Exception as e:
+            st.error(f"Extraction failed, using demo data: {str(e)}")
+            return self._get_demo_data(url, data_type)
+    def _try_real_extraction(self, url: str) -> Dict:
+        """Try real extraction with better error handling"""
+        try:
+            # Use a proxy-like approach with different user agents
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+                'Accept-Language': 'en-US,en;q=0.5',
+                'Accept-Encoding': 'gzip, deflate, br',
+                'DNT': '1',
+                'Connection': 'keep-alive',
+                'Upgrade-Insecure-Requests': '1',
+            }
+            # Try with shorter timeout
+            response = requests.get(url, headers=headers, timeout=10, verify=False)
+            if response.status_code == 200:
+                soup = BeautifulSoup(response.text, 'html.parser')
+                # Extract basic info
+                title = soup.find('title')
+                description = soup.find('meta', attrs={'name': 'description'})
+                return {
+                    "page_info": {
+                        "title": title.text if title else "Facebook Content",
+                        "description": description['content'] if description else "",
+                        "url": url,
+                        "response_code": 200,
+                        "content_length": len(response.text)
+                    },
+                    "content_blocks": self._extract_real_content(soup),
+                    "extraction_time": datetime.now().isoformat(),
+                    "data_type": "page",
+                    "status": "success",
+                    "source": "real"
+                }
+            else:
+                return {"status": "error", "source": "real"}
+        except Exception:
+            return {"status": "error", "source": "real"}
+    def _extract_real_content(self, soup) -> List[Dict]:
+        """Extract content from real page"""
         blocks = []
+        text = soup.get_text()
+        paragraphs = [p.strip() for p in text.split('.') if p.strip() and len(p.strip()) > 30]
+        for i, paragraph in enumerate(paragraphs[:8]):
+            blocks.append({
                 "id": i + 1,
                 "content": paragraph,
                 "length": len(paragraph),
                 "word_count": len(paragraph.split()),
+                "content_type": "real_content",
+                "is_public_content": True
+            })
         return blocks
+    def _get_demo_data(self, url: str, data_type: str) -> Dict:
+        """Get realistic demo data based on URL type"""
+        url_type = self._analyze_url_type(url)
+        if 'group' in url_type.lower():
+            return self._get_group_demo_data(url, data_type)
+        elif 'page' in url_type.lower():
+            return self._get_page_demo_data(url, data_type)
         else:
+            return self._get_general_demo_data(url, data_type)
+    def _analyze_url_type(self, url: str) -> str:
+        """Analyze URL type for realistic demo data"""
+        url_lower = url.lower()
+        if 'group' in url_lower:
+            return "Facebook Group"
+        elif 'page' in url_lower or 'facebook.com/' in url_lower and '/pages/' not in url_lower:
+            return "Facebook Page"
+        elif 'event' in url_lower:
+            return "Facebook Event"
+        elif 'marketplace' in url_lower:
+            return "Facebook Marketplace"
+        else:
+            return "Facebook Content"
+    def _get_group_demo_data(self, url: str, data_type: str) -> Dict:
+        """Get realistic group demo data"""
+        group_name = self._extract_name_from_url(url) or "Gaming Community"
+        return {
+            "page_info": {
+                "title": f"{group_name} | Facebook Group",
+                "description": f"A community of {group_name} enthusiasts sharing content, discussions, and events.",
+                "member_count": "15.7K members",
+                "url": url,
+                "response_code": 200,
+                "content_length": 15000,
+                "access_note": "Public group - Limited data due to platform restrictions"
+            },
+            "content_blocks": [
+                {
+                    "id": 1,
+                    "content": f"Welcome to {group_name}! This is a community for fans and enthusiasts to share their experiences, ask questions, and connect with like-minded people.",
+                    "length": 120,
+                    "word_count": 25,
+                    "content_type": "welcome_message",
+                    "is_public_content": True
+                },
+                {
+                    "id": 2,
+                    "content": "Just shared my latest project in the group! Would love to get some feedback from the community on the new features we're implementing.",
+                    "length": 95,
+                    "word_count": 18,
+                    "content_type": "member_post",
+                    "is_public_content": True
+                },
+                {
+                    "id": 3,
+                    "content": "Does anyone have experience with this issue? I've been trying to solve it for a while and could use some community wisdom.",
+                    "length": 88,
+                    "word_count": 16,
+                    "content_type": "question_post",
+                    "is_public_content": True
+                },
+                {
+                    "id": 4,
+                    "content": "Our monthly meetup is scheduled for next Saturday! Don't forget to RSVP so we can plan accordingly. Looking forward to seeing everyone there.",
+                    "length": 102,
+                    "word_count": 19,
+                    "content_type": "event_announcement",
+                    "is_public_content": True
+                },
+                {
+                    "id": 5,
+                    "content": "The community guidelines: Be respectful, no spam, keep discussions relevant to the group's topic, and help each other grow.",
+                    "length": 78,
+                    "word_count": 14,
+                    "content_type": "community_guidelines",
+                    "is_public_content": True
+                }
+            ],
+            "url_type": "Facebook Group",
+            "extraction_time": datetime.now().isoformat(),
+            "data_type": data_type,
+            "status": "success",
+            "source": "demo"
+        }
+    def _get_page_demo_data(self, url: str, data_type: str) -> Dict:
+        """Get realistic page demo data"""
+        page_name = self._extract_name_from_url(url) or "Brand Page"
+        return {
+            "page_info": {
+                "title": f"{page_name} | Facebook Page",
+                "description": f"Official Facebook page of {page_name}. Stay updated with our latest news, products, and community events.",
+                "follower_count": "45.2K followers",
+                "url": url,
+                "response_code": 200,
+                "content_length": 12000,
+                "access_note": "Public page - Limited data due to platform restrictions"
+            },
+            "content_blocks": [
+                {
+                    "id": 1,
+                    "content": f"Welcome to the official {page_name} Facebook page! Here you'll find the latest updates, news, and announcements from our team.",
+                    "length": 98,
+                    "word_count": 15,
+                    "content_type": "welcome_message",
+                    "is_public_content": True
+                },
+                {
+                    "id": 2,
+                    "content": "We're excited to announce our new product launch next week! Stay tuned for more details and special offers for our Facebook community.",
+                    "length": 92,
+                    "word_count": 16,
+                    "content_type": "announcement",
+                    "is_public_content": True
+                },
+                {
+                    "id": 3,
+                    "content": "Thank you to everyone who participated in our recent event! The feedback has been incredible and we're already planning the next one.",
+                    "length": 87,
+                    "word_count": 14,
+                    "content_type": "event_followup",
+                    "is_public_content": True
+                },
+                {
+                    "id": 4,
+                    "content": "Customer support hours: Monday-Friday 9AM-6PM. For urgent issues, please message us directly and we'll respond as soon as possible.",
+                    "length": 85,
+                    "word_count": 15,
+                    "content_type": "support_info",
+                    "is_public_content": True
+                }
+            ],
+            "url_type": "Facebook Page",
+            "extraction_time": datetime.now().isoformat(),
+            "data_type": data_type,
+            "status": "success",
+            "source": "demo"
+        }
+    def _get_general_demo_data(self, url: str, data_type: str) -> Dict:
+        """Get general demo data"""
+        return {
+            "page_info": {
+                "title": "Facebook Content",
+                "description": "Social media content and community interactions",
+                "url": url,
+                "response_code": 200,
+                "content_length": 8000,
+                "access_note": "Public content - Platform restrictions apply"
+            },
+            "content_blocks": [
+                {
+                    "id": 1,
+                    "content": "Community engagement and social interactions are key aspects of this platform. Users share content, connect with friends, and participate in discussions.",
+                    "length": 105,
+                    "word_count": 16,
+                    "content_type": "general_content",
+                    "is_public_content": True
+                },
+                {
+                    "id": 2,
+                    "content": "Recent updates have improved user experience with better content discovery and enhanced privacy controls for community members.",
+                    "length": 82,
+                    "word_count": 12,
+                    "content_type": "platform_updates",
+                    "is_public_content": True
+                }
+            ],
+            "url_type": "Facebook Content",
+            "extraction_time": datetime.now().isoformat(),
+            "data_type": data_type,
+            "status": "success",
+            "source": "demo"
+        }
+    def _extract_name_from_url(self, url: str) -> str:
+        """Extract name from URL for realistic demo data"""
+        # Extract name from URL for more realistic demo data
+        match = re.search(r'facebook\.com/(?:groups/|pages/)?([^/?]+)', url)
+        if match:
+            name = match.group(1)
+            # Clean up the name
+            name = name.replace('-', ' ').title()
+            return name
+        return ""
+    def _create_demo_data(self) -> Dict:
+        """Create comprehensive demo data"""
+        return {
+            "groups": {
+                "gamersofbangladesh2": "Gaming Community Bangladesh",
+                "programmingcommunity": "Programming Community",
+                "startupdiscussions": "Startup Discussions"
+            },
+            "pages": {
+                "meta": "Meta Official",
+                "starbucks": "Starbucks Coffee",
+                "nasa": "NASA"
+            }
+        }
 # AI Functions (same as your LinkedIn analyzer)
 def get_embeddings():
     page_info = extracted_data['page_info']
     content_blocks = extracted_data['content_blocks']
     url_type = extracted_data['url_type']
+    source = extracted_data.get('source', 'unknown')
     all_text = f"FACEBOOK DATA ANALYSIS\n{'='*50}\n\n"
     all_text += f"📄 PAGE INFORMATION:\n"
     all_text += f"Title: {page_info['title']}\n"
     all_text += f"URL Type: {url_type}\n"
+    all_text += f"Data Source: {source.upper()}\n"
+    all_text += f"Access: {page_info.get('access_note', 'Public content')}\n"
+    if page_info.get('member_count'):
+        all_text += f"Members: {page_info['member_count']}\n"
+    elif page_info.get('follower_count'):
+        all_text += f"Followers: {page_info['follower_count']}\n"
     all_text += f"Extracted: {extracted_data['extraction_time']}\n\n"
     all_text += f"📊 CONTENT ANALYSIS:\n"
     all_text += f"Content Blocks: {len(content_blocks)}\n"
     all_text += f"Public Content: {sum(1 for b in content_blocks if b['is_public_content'])} blocks\n\n"
+    for i, block in enumerate(content_blocks):
         all_text += f"--- BLOCK {i+1} ---\n"
         all_text += f"Type: {block['content_type']}\n"
         all_text += f"Words: {block['word_count']} | Public: {block['is_public_content']}\n"
         return None
 def main():
+    st.title("📘 Facebook Data Extractor")
+    st.markdown("**University Project** - Real data when possible, realistic demo data when restricted")
     if st.button("← Back to Main Dashboard"):
         st.switch_page("app.py")
     # Initialize session state
     if "extractor" not in st.session_state:
+        st.session_state.extractor = FacebookDataSimulator()
     if "facebook_data" not in st.session_state:
         st.session_state.facebook_data = None
     if "vectorstore" not in st.session_state:
         data_type = st.selectbox(
             "Content Type",
+            ["group", "page", "event", "post", "general"],
             help="Select the type of Facebook content"
         )
         facebook_url = st.text_input(
             "Facebook URL",
+            placeholder="https://www.facebook.com/groups/gamersofbangladesh2",
+            help="Enter any Facebook URL for analysis"
         )
+        # Quick test URLs
         st.markdown("### 🚀 Test URLs")
         test_urls = {
+            "Gaming Group": "https://www.facebook.com/groups/gamersofbangladesh2",
+            "Tech Community": "https://www.facebook.com/groups/programmingcommunity",
+            "Business Page": "https://www.facebook.com/Meta/",
         }
         for name, url in test_urls.items():
             elif 'facebook.com' not in url_to_use:
                 st.error("❌ Please enter a valid Facebook URL")
             else:
+                with st.spinner("🔄 Analyzing Facebook data..."):
+                    extracted_data = st.session_state.extractor.extract_data(url_to_use, data_type)
                     if extracted_data.get("status") == "success":
                         st.session_state.facebook_data = extracted_data
                             st.session_state.vectorstore = vectorstore
                             st.session_state.chatbot = create_chatbot(vectorstore)
                             st.session_state.chat_history = []
+                            source = extracted_data.get('source', 'unknown')
+                            if source == 'demo':
+                                st.warning("📝 Using realistic demo data (Facebook restrictions active)")
+                            else:
+                                st.success("✅ Real data extracted successfully!")
                         else:
                             st.error("❌ Failed to process data for AI")
                     else:
             data = st.session_state.facebook_data
             page_info = data['page_info']
             content_blocks = data['content_blocks']
+            source = data.get('source', 'unknown')
+            if source == 'demo':
+                st.warning("📝 **Demo Data** - Realistic simulation (Facebook restrictions)")
+            else:
+                st.success("✅ **Real Data** - Successfully extracted")
             # Metrics
             col1, col2, col3 = st.columns(3)
             with col1:
                 st.metric("Content Blocks", len(content_blocks))
             with col2:
+                st.metric("Data Source", source.upper())
             with col3:
+                st.metric("Status", "Success")
             # Page info
             st.subheader("🏷️ Page Information")
             st.write(f"**Title:** {page_info['title']}")
             st.write(f"**URL Type:** {data['url_type']}")
+            st.write(f"**Description:** {page_info.get('description', 'No description')}")
+            if page_info.get('member_count'):
+                st.write(f"**Members:** {page_info['member_count']}")
+            elif page_info.get('follower_count'):
+                st.write(f"**Followers:** {page_info['follower_count']}")
+            st.write(f"**Access:** {page_info.get('access_note', 'Public content')}")
             # Content samples
+            st.subheader("📝 Content Analysis")
+            for i, block in enumerate(content_blocks):
+                with st.expander(f"Content {i+1} - {block['content_type']} ({block['word_count']} words)"):
                     st.write(block['content'])
+                    st.caption(f"Public: {block['is_public_content']}")
         else:
             st.info("""
+            ## 📘 Facebook Data Extractor
+            **University Project Feature**
             **How it works:**
+            1. Enter any Facebook URL
+            2. System tries real data extraction
+            3. If blocked, uses **realistic demo data**
+            4. Full AI analysis available
+            **Features:**
+            - Real data extraction when possible
+            - Realistic demo data when restricted
+            - Full AI-powered analysis
+            - Professional interface
+            **Perfect for demonstrating:**
+            - Social media data extraction concepts
+            - AI analysis capabilities
+            - Platform integration
+            - Error handling strategies
             """)
     with col2:
             if not st.session_state.chat_history:
                 st.subheader("💡 Try asking:")
                 suggestions = [
+                    "What is this Facebook group/page about?",
+                    "Summarize the main content and purpose",
+                    "What kind of community is this?",
+                    "Analyze the engagement and activity level"
                 ]
                 for suggestion in suggestions:
                         st.info(f"Type: '{suggestion}' in chat")
         elif st.session_state.facebook_data:
+            st.info("💬 Start chatting with AI about the Facebook data")
         else:
             st.info("🔍 Extract Facebook data to enable AI chat")