Spaces:

NimaKL
/

LetsTalk

Runtime error

App Files Files Community

NimaKL commited on May 27, 2025

Commit

cba807f

verified ·

1 Parent(s): cffd762

Update app.py

Browse files

Files changed (1) hide show

app.py +732 -224

app.py CHANGED Viewed

@@ -4,10 +4,13 @@ import logging
 from typing import List, Dict, Tuple
 import pandas as pd
 from datetime import datetime
-import os
-# Set up logging
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Get Neo4j credentials from Hugging Face secrets
@@ -18,10 +21,13 @@ NEO4J_PASSWORD = os.environ['NEO4J_PASSWORD']
 def format_neo4j_datetime(dt) -> str:
     """Convert Neo4j datetime to string format."""
     if dt is None:
         return 'Unknown date'
     try:
         if hasattr(dt, 'to_native'):
             dt = dt.to_native()
         return dt.strftime('%Y-%m-%d')
     except Exception as e:
         logger.warning(f"Error formatting datetime: {e}")
@@ -38,23 +44,177 @@ def format_interest_list(interests: set, max_items: int = 10) -> str:
 class QuestionRecommender:
     def __init__(self):
-        self.driver = GraphDatabase.driver(
-            NEO4J_URL,
-            auth=(NEO4J_USER, NEO4J_PASSWORD)
-        )
     def close(self):
         self.driver.close()
     def get_all_users(self) -> List[str]:
-        """Get list of all users."""
         with self.driver.session() as session:
-            result = session.run("""
-                MATCH (u:User)
-                RETURN DISTINCT u.name as username
-                ORDER BY username
-            """)
-            return [record["username"] for record in result if record["username"]]
     def get_user_interests(self, username: str) -> Dict[str, set]:
         """Get keywords and topics a user is interested in."""
@@ -65,6 +225,9 @@ class QuestionRecommender:
                 RETURN DISTINCT k.keyword as keyword
             """, username=username)
             keywords = {str(record["keyword"]) for record in keyword_result if record["keyword"]}
             # Get topics the user is interested in
             topic_result = session.run("""
@@ -72,139 +235,464 @@ class QuestionRecommender:
                 RETURN DISTINCT t.topic as topic
             """, username=username)
             topics = {str(record["topic"]) for record in topic_result if record["topic"]}
             return {"keywords": keywords or set(), "topics": topics or set()}
     def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
-        """Find questions to recommend based on common interests."""
         with self.driver.session() as session:
-            # First try to find questions with common keywords, but be more selective
-            keyword_questions = session.run("""
-                // Find keywords that both users are interested in
-                MATCH (u1:User {name: $user1})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)<-[:INTERESTED_IN_KEYWORD]-(u2:User {name: $user2})
-                // Calculate how specific each keyword is based on total user interest
-                MATCH (anyUser:User)-[:INTERESTED_IN_KEYWORD]->(k)
-                WITH k, COUNT(anyUser) as keyword_popularity
-                WHERE keyword_popularity < 1000  // Filter out extremely common keywords
-                // Find questions with these more specific common keywords
-                MATCH (q:Question)-[:HAS_KEYWORD]->(k)
-                WHERE q.author <> $user1 AND q.author <> $user2
-                // Group questions and calculate relevance
-                WITH q, k, 1.0/keyword_popularity as keyword_specificity
-                WITH q,
-                     COLLECT(DISTINCT k.keyword) as matching_keywords,
-                     SUM(keyword_specificity) as relevance_score
-                // Ensure we have enough matching keywords but not too many
-                WHERE SIZE(matching_keywords) >= 2 AND SIZE(matching_keywords) <= 5
-                // Return questions with their details
-                RETURN DISTINCT
-                    q.title as title,
-                    q.body as body,
-                    q.created_utc_ts as created_date,
-                    q.author as author,
-                    matching_keywords as keywords,
-                    relevance_score
-                ORDER BY relevance_score DESC, q.created_utc_ts DESC
-                LIMIT 25
-            """, user1=user1, user2=user2)
-            questions = [dict(record) for record in keyword_questions]
-            # If no questions found with common keywords, try topics
-            if not questions:
-                topic_questions = session.run("""
-                    // Find topics that both users are interested in
-                    MATCH (u1:User {name: $user1})-[:INTERESTED_IN_TOPIC]->(t:Topic)<-[:INTERESTED_IN_TOPIC]-(u2:User {name: $user2})
-                    WITH DISTINCT t
-                    // Find questions with these common topics
-                    MATCH (q:Question)-[:HAS_TOPIC]->(t)
-                    WHERE q.author <> $user1 AND q.author <> $user2
-                    // Group questions and calculate relevance
-                    WITH q, COLLECT(DISTINCT t.topic) as matching_topics
-                    WITH q, matching_topics, SIZE(matching_topics) as relevance_score
-                    // Return questions with their details
-                    RETURN DISTINCT
-                        q.title as title,
-                        q.body as body,
-                        q.created_utc_ts as created_date,
-                        q.author as author,
-                        matching_topics as topics,
-                        relevance_score
-                    ORDER BY relevance_score DESC, q.created_utc_ts DESC
-                    LIMIT $limit
-                """, user1=user1, user2=user2, limit=max_questions)
-                questions = [dict(record) for record in topic_questions]
-            # Post-process to remove duplicate/similar questions and ensure diversity
-            seen_titles = set()
-            filtered_questions = []
-            for q in questions:
-                # Create a simplified version of the title for comparison
-                simple_title = q['title'].lower().strip()
-                # Skip if we've seen a very similar title
-                if any(self._titles_are_similar(simple_title, seen) for seen in seen_titles):
-                    continue
-                seen_titles.add(simple_title)
-                filtered_questions.append(q)
-                if len(filtered_questions) >= max_questions:
-                    break
-            return filtered_questions
-    def _titles_are_similar(self, title1: str, title2: str, similarity_threshold: float = 0.8) -> bool:
-        """Check if two titles are very similar to avoid recommending duplicate questions."""
-        # Remove common punctuation and convert to set of words
-        words1 = set(title1.replace('?', '').replace('!', '').replace('.', '').split())
-        words2 = set(title2.replace('?', '').replace('!', '').replace('.', '').split())
-        # Calculate Jaccard similarity
-        intersection = len(words1 & words2)
-        union = len(words1 | words2)
-        if union == 0:
-            return False
-        return intersection / union >= similarity_threshold
 def format_question(q: Dict) -> str:
-    """Format a question for display."""
-    created_date = format_neo4j_datetime(q.get('created_date'))
-    keywords_or_topics = q.get('keywords', q.get('topics', []))
-    interests = format_interest_list(set(k for k in keywords_or_topics if k is not None), max_items=5)
-    author = q.get('author', 'Unknown author')
-    title = q.get('title', 'Untitled')
-    body = q.get('body', '')
-    # Only show body section if there's actual content
-    body_html = f"""
-    <div class="question-body">
-        {body[:300] + "... [truncated]" if body and len(body) > 300 else body}
-    </div>
-    """ if body else ""
-    return f"""
-<div class="question-card">
-    <h3>{title}</h3>
-    <div class="question-meta">
-        Posted by <span class="author">{author}</span> on <span class="date">{created_date}</span>
-    </div>
-    <div class="interests">
-        Common Interests: <span class="interest-tags">{interests}</span>
-    </div>{body_html}
-</div>
-"""
 def loading_message() -> Tuple[str, str, str]:
     """Return loading message in proper HTML format."""
@@ -218,8 +706,12 @@ def loading_message() -> Tuple[str, str, str]:
     """
     return loading_html, loading_html, loading_html
-def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str]:
     """Main function to get recommendations and user interests."""
     recommender = QuestionRecommender()
     try:
         # Get interests for both users
@@ -232,62 +724,61 @@ def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str]:
         # Format interests summary
         interests_summary = f"""
-<div class="interests-summary">
-    <div class="user-interests">
-        <h3>{user1}'s Interests</h3>
-        <div class="interest-section">
-            <strong>Keywords:</strong> {format_interest_list(user1_interests['keywords'], max_items=8)}
-        </div>
-        <div class="interest-section">
-            <strong>Topics:</strong> {format_interest_list(user1_interests['topics'], max_items=5)}
-        </div>
-    </div>
-    <div class="user-interests">
-        <h3>{user2}'s Interests</h3>
-        <div class="interest-section">
-            <strong>Keywords:</strong> {format_interest_list(user2_interests['keywords'], max_items=8)}
-        </div>
-        <div class="interest-section">
-            <strong>Topics:</strong> {format_interest_list(user2_interests['topics'], max_items=5)}
-        </div>
-    </div>
-    <div class="common-interests">
-        <h3>Common Interests</h3>
-        <div class="interest-section">
-            <strong>Keywords:</strong> {format_interest_list(common_keywords, max_items=8)}
-        </div>
-        <div class="interest-section">
-            <strong>Topics:</strong> {format_interest_list(common_topics, max_items=5)}
         </div>
-    </div>
-</div>
-"""
-        # Get recommended questions
-        questions = recommender.find_common_questions(user1, user2)
         if questions:
-            questions_text = """<div class="questions-container">\n""" + \
-                           """\n""".join(format_question(q) for q in questions) + \
-                           """\n</div>"""
-            recommendation_type = """<h2 class="recommendation-header">""" + \
-                                ("Recommendations Based on Common Keywords" if 'keywords' in questions[0]
-                                 else "Recommendations Based on Common Topics") + \
-                                """</h2>"""
         else:
-            questions_text = """<div class="no-questions">No questions found based on common interests.</div>"""
-            recommendation_type = """<h2 class="recommendation-header">No Recommendations Available</h2>"""
-        return interests_summary, recommendation_type, questions_text
     except Exception as e:
         logger.error(f"Error in recommend_questions: {str(e)}")
         return (
-            """<div class="error">Error fetching user interests. Please try again.</div>""",
-            """<h2 class="error-header">Error</h2>""",
-            f"""<div class="error-message">An error occurred: {str(e)}</div>"""
         )
     finally:
         recommender.close()
@@ -469,54 +960,71 @@ strong {
 }
 """
-# Create Gradio interface
-recommender = QuestionRecommender()
-users = recommender.get_all_users()
-recommender.close()
-with gr.Blocks(title="Question Recommender", theme=gr.themes.Soft(), css=custom_css) as iface:
-    gr.Markdown("""
-    # 🤝 Question Recommender
-    Find questions that two users might be interested in discussing together based on their common interests.
-    """)
-    with gr.Row(equal_height=True):
-        with gr.Column(scale=1):
-            user1_dropdown = gr.Dropdown(
-                choices=users,
-                label="👤 First User",
-                interactive=True,
-                max_choices=None
-            )
-        with gr.Column(scale=1):
-            user2_dropdown = gr.Dropdown(
-                choices=users,
-                label="👤 Second User",
-                interactive=True,
-                max_choices=None
-            )
-    recommend_btn = gr.Button(
-        "🔍 Get Recommendations",
-        variant="primary",
-        size="lg"
-    )
-    with gr.Row():
-        interests_output = gr.HTML(label="Common Interests")
-    recommendation_type = gr.HTML()
-    questions_output = gr.HTML()
-    # Add loading state
-    recommend_btn.click(
-        fn=loading_message,  # First show loading message
-        outputs=[interests_output, recommendation_type, questions_output],
-        queue=False  # Don't queue this call
-    ).then(  # Then get the actual recommendations
-        fn=recommend_questions,
-        inputs=[user1_dropdown, user2_dropdown],
-        outputs=[interests_output, recommendation_type, questions_output]
     )
-iface.launch()

 from typing import List, Dict, Tuple
 import pandas as pd
 from datetime import datetime
+# Set up logging with more detailed format for debugging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
 logger = logging.getLogger(__name__)
 # Get Neo4j credentials from Hugging Face secrets
 def format_neo4j_datetime(dt) -> str:
     """Convert Neo4j datetime to string format."""
     if dt is None:
+        logger.info("Received None datetime")
         return 'Unknown date'
     try:
+        logger.info(f"Formatting datetime: {dt} of type {type(dt)}")
         if hasattr(dt, 'to_native'):
             dt = dt.to_native()
+            logger.info(f"Converted to native: {dt} of type {type(dt)}")
         return dt.strftime('%Y-%m-%d')
     except Exception as e:
         logger.warning(f"Error formatting datetime: {e}")
 class QuestionRecommender:
     def __init__(self):
+        try:
+            self.driver = GraphDatabase.driver(
+                NEO4J_URL,
+                auth=(NEO4J_USER, NEO4J_PASSWORD)
+            )
+            logger.info("Initializing QuestionRecommender with debug database")
+            # Test connection immediately
+            self.driver.verify_connectivity()
+            logger.info("Successfully connected to Neo4j database")
+            self.verify_connection()
+            # Inspect question types on initialization
+            self.inspect_question_types()
+        except Exception as e:
+            logger.error(f"Failed to initialize database connection: {str(e)}")
+            raise
+    def verify_connection(self):
+        """Verify database connection and log basic statistics."""
+        try:
+            with self.driver.session() as session:
+                # First try a simple query to verify connection
+                test_result = session.run("MATCH (n) RETURN count(n) as count").single()
+                if not test_result:
+                    raise Exception("Could not execute test query")
+                logger.info(f"Database contains {test_result['count']} total nodes")
+                # Get database statistics with relationship counts
+                stats = session.run("""
+                    // Count nodes
+                    MATCH (u:User)
+                    WITH COUNT(u) as user_count
+                    MATCH (k:Keyword)
+                    WITH user_count, COUNT(k) as keyword_count
+                    MATCH (q:Question)
+                    WITH user_count, keyword_count, COUNT(q) as question_count
+                    MATCH (t:Topic)
+                    WITH user_count, keyword_count, question_count, COUNT(t) as topic_count
+                    // Count relationships
+                    OPTIONAL MATCH ()-[r:INTERESTED_IN_KEYWORD]->()
+                    WITH user_count, keyword_count, question_count, topic_count, COUNT(r) as keyword_rel_count
+                    OPTIONAL MATCH ()-[r:INTERESTED_IN_TOPIC]->()
+                    WITH user_count, keyword_count, question_count, topic_count, keyword_rel_count, COUNT(r) as topic_rel_count
+                    OPTIONAL MATCH ()-[r:HAS_KEYWORD]->()
+                    WITH user_count, keyword_count, question_count, topic_count, keyword_rel_count, topic_rel_count, COUNT(r) as question_keyword_count
+                    OPTIONAL MATCH ()-[r:HAS_TOPIC]->()
+                    RETURN
+                        user_count, keyword_count, question_count, topic_count,
+                        keyword_rel_count, topic_rel_count,
+                        question_keyword_count, COUNT(r) as question_topic_count
+                """).single()
+                if not stats:
+                    raise Exception("Could not retrieve database statistics")
+                logger.info("=== Database Statistics ===")
+                logger.info(f"Nodes:")
+                logger.info(f"  Users: {stats['user_count']}")
+                logger.info(f"  Keywords: {stats['keyword_count']}")
+                logger.info(f"  Questions: {stats['question_count']}")
+                logger.info(f"  Topics: {stats['topic_count']}")
+                logger.info(f"\nRelationships:")
+                logger.info(f"  User->Keyword (INTERESTED_IN_KEYWORD): {stats['keyword_rel_count']}")
+                logger.info(f"  User->Topic (INTERESTED_IN_TOPIC): {stats['topic_rel_count']}")
+                logger.info(f"  Question->Keyword (HAS_KEYWORD): {stats['question_keyword_count']}")
+                logger.info(f"  Question->Topic (HAS_TOPIC): {stats['question_topic_count']}")
+        except Exception as e:
+            logger.error(f"Database verification failed: {str(e)}")
+            logger.error(f"URL: {NEO4J_URL}")
+            logger.error(f"User: {NEO4J_USER}")
+            raise Exception(f"Failed to verify database connection: {str(e)}")
+    def inspect_question_types(self):
+        """Inspect different types of questions and their attributes in the database."""
+        with self.driver.session() as session:
+            try:
+                # Get all distinct question types and their properties
+                result = session.run("""
+                    MATCH (q:Question)
+                    WITH DISTINCT keys(q) as props, labels(q) as types
+                    RETURN types, props, count(*) as count
+                    ORDER BY count DESC
+                """)
+                logger.info("\n=== Question Types and Properties ===")
+                for record in result:
+                    types = record["types"]
+                    props = record["props"]
+                    count = record["count"]
+                    logger.info(f"\nType: {types}")
+                    logger.info(f"Count: {count}")
+                    logger.info("Properties:")
+                    for prop in props:
+                        # Get a sample value for this property
+                        sample = session.run("""
+                            MATCH (q:Question)
+                            WHERE $prop in keys(q)
+                            RETURN q[$prop] as value
+                            LIMIT 1
+                        """, prop=prop).single()
+                        value = sample["value"] if sample else None
+                        value_type = type(value).__name__ if value is not None else "None"
+                        logger.info(f"  - {prop}: {value_type} (example: {str(value)[:100]}{'...' if str(value)[100:] else ''})")
+                # Get relationships specific to different question types
+                result = session.run("""
+                    MATCH (q:Question)-[r]->(target)
+                    WITH DISTINCT type(r) as rel_type, labels(target) as target_labels, count(*) as count
+                    RETURN rel_type, target_labels, count
+                    ORDER BY count DESC
+                """)
+                logger.info("\n=== Question Relationships ===")
+                for record in result:
+                    rel_type = record["rel_type"]
+                    target_labels = record["target_labels"]
+                    count = record["count"]
+                    logger.info(f"Relationship: {rel_type} -> {target_labels} (Count: {count})")
+            except Exception as e:
+                logger.error(f"Error inspecting question types: {str(e)}")
+                raise
     def close(self):
         self.driver.close()
     def get_all_users(self) -> List[str]:
+        """Get list of all users with interest counts."""
         with self.driver.session() as session:
+            try:
+                # Get users with their interest counts using proper relationship patterns
+                result = session.run("""
+                    MATCH (u:User)
+                    OPTIONAL MATCH (u)-[r:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest)
+                    WITH u,
+                         COUNT(DISTINCT CASE WHEN type(r) = 'INTERESTED_IN_KEYWORD' THEN interest END) as keyword_count,
+                         COUNT(DISTINCT CASE WHEN type(r) = 'INTERESTED_IN_TOPIC' THEN interest END) as topic_count
+                    WHERE keyword_count > 0 OR topic_count > 0
+                    RETURN
+                        u.name as username,
+                        keyword_count,
+                        topic_count,
+                        keyword_count + topic_count as total_interests
+                    ORDER BY total_interests DESC, username
+                """)
+                users_with_counts = [(
+                    record["username"],
+                    record["keyword_count"],
+                    record["topic_count"]
+                ) for record in result if record["username"]]
+                if not users_with_counts:
+                    logger.warning("No users found with interests")
+                    return []
+                logger.info(f"Retrieved {len(users_with_counts)} users with interests")
+                logger.info("Top 5 users by interest count:")
+                for username, kw_count, topic_count in users_with_counts[:5]:
+                    logger.info(f"  - {username}: {kw_count} keywords, {topic_count} topics")
+                # Format usernames with their counts
+                return [
+                    f"{username} ({kw_count} keywords, {topic_count} topics)"
+                    for username, kw_count, topic_count in users_with_counts
+                ]
+            except Exception as e:
+                logger.error(f"Error fetching users: {str(e)}")
+                return []
     def get_user_interests(self, username: str) -> Dict[str, set]:
         """Get keywords and topics a user is interested in."""
                 RETURN DISTINCT k.keyword as keyword
             """, username=username)
             keywords = {str(record["keyword"]) for record in keyword_result if record["keyword"]}
+            # Log keyword count for debugging
+            logger.debug(f"Found {len(keywords)} keywords for user {username}")
             # Get topics the user is interested in
             topic_result = session.run("""
                 RETURN DISTINCT t.topic as topic
             """, username=username)
             topics = {str(record["topic"]) for record in topic_result if record["topic"]}
+            # Log topic count for debugging
+            logger.debug(f"Found {len(topics)} topics for user {username}")
             return {"keywords": keywords or set(), "topics": topics or set()}
     def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
+        """Find questions to recommend based on common interests using advanced Neo4j features."""
         with self.driver.session() as session:
+            # Debug: Check if users exist and have interests
+            user_check = session.run("""
+                MATCH (u1:User {name: $user1})
+                MATCH (u2:User {name: $user2})
+                OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
+                OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
+                RETURN
+                    COUNT(DISTINCT u1) as user1_exists,
+                    COUNT(DISTINCT u2) as user2_exists,
+                    COUNT(DISTINCT interest1) as user1_interests,
+                    COUNT(DISTINCT interest2) as user2_interests
+            """, user1=user1, user2=user2).single()
+            if not (user_check and user_check['user1_exists'] and user_check['user2_exists']):
+                logger.error(f"One or both users not found: {user1}, {user2}")
+                return []
+            logger.info(f"User {user1} has {user_check['user1_interests']} total interests")
+            logger.info(f"User {user2} has {user_check['user2_interests']} total interests")
+            # Advanced question recommendation query using Neo4j path finding and scoring
+            questions_query = """
+            // Find all interests (both keywords and topics) for both users
+            MATCH (u1:User {name: $user1})
+            MATCH (u2:User {name: $user2})
+            // Get all interests for both users
+            OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
+            OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
+            WITH u1, u2,
+                 COLLECT(DISTINCT interest1) as u1_interests,
+                 COLLECT(DISTINCT interest2) as u2_interests
+            // Find questions related to either user's interests for each source
+            CALL {
+                WITH u1, u2, u1_interests, u2_interests
+                UNWIND u1_interests + u2_interests as interest
+                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
+                WHERE
+                    q.author <> $user1 AND
+                    q.author <> $user2 AND
+                    q.source = 'stack_exchange' AND
+                    (
+                        (interest IN u1_interests AND interest IN u2_interests) OR
+                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
+                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
+                    )
+                WITH q, interest, type(r) as rel_type,
+                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
+                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
+                     sum(interest_weight) as base_score
+                RETURN q, interests, base_score
+                ORDER BY base_score * rand() DESC
+                LIMIT 15 // Increased from 10 to get more variety
+                UNION
+                WITH u1, u2, u1_interests, u2_interests
+                UNWIND u1_interests + u2_interests as interest
+                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
+                WHERE
+                    q.source = 'trivia' AND
+                    (
+                        (interest IN u1_interests AND interest IN u2_interests) OR
+                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
+                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
+                    )
+                WITH q, interest, type(r) as rel_type,
+                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
+                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
+                     sum(interest_weight) as base_score
+                RETURN q, interests, base_score
+                ORDER BY base_score * rand() DESC
+                LIMIT 15 // Increased from 10 to get more variety
+                UNION
+                WITH u1, u2, u1_interests, u2_interests
+                UNWIND u1_interests + u2_interests as interest
+                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
+                WHERE
+                    q.source = 'wikipedia' AND
+                    (
+                        (interest IN u1_interests AND interest IN u2_interests) OR
+                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
+                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
+                    )
+                WITH q, interest, type(r) as rel_type,
+                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
+                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
+                     sum(interest_weight) as base_score
+                RETURN q, interests, base_score
+                ORDER BY base_score * rand() DESC
+                LIMIT 15 // Increased from 10 to get more variety
+                UNION
+                WITH u1, u2, u1_interests, u2_interests
+                UNWIND u1_interests + u2_interests as interest
+                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
+                WHERE
+                    q.source = 'reddit' AND
+                    (
+                        (interest IN u1_interests AND interest IN u2_interests) OR
+                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
+                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
+                    )
+                WITH q, interest, type(r) as rel_type,
+                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
+                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
+                     sum(interest_weight) as base_score
+                RETURN q, interests, base_score
+                ORDER BY base_score * rand() DESC
+                LIMIT 15 // Increased from 10 to get more variety
+            }
+            // Calculate temporal relevance for the combined results
+            WITH q, interests, base_score,
+                 CASE
+                     WHEN q.created_utc_ts IS NOT NULL
+                     THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
+                     ELSE base_score
+                 END as temporal_score,
+                 // Add source-specific random boost to ensure better mixing
+                 CASE q.source
+                     WHEN 'stack_exchange' THEN rand() * 0.4
+                     WHEN 'trivia' THEN rand() * 0.4
+                     WHEN 'wikipedia' THEN rand() * 0.4
+                     WHEN 'reddit' THEN rand() * 0.4
+                     ELSE rand() * 0.4
+                 END as source_random_boost
+            // Return results with all metadata
+            WITH q, interests, temporal_score, source_random_boost,
+                 temporal_score * (0.6 + 0.8 * rand()) + source_random_boost as final_score
+            RETURN DISTINCT
+                q.title as title,
+                q.body as body,
+                q.created_utc_ts as created_utc_ts,
+                q.author as author,
+                q.source as source,
+                q.correct_answer as correct_answer,
+                q.incorrect_answers as incorrect_answers,
+                q.upvotes as upvotes,
+                q.num_comments as num_comments,
+                q.subreddit as subreddit,
+                [i in interests | CASE
+                    WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
+                    ELSE i.interest.topic
+                END] as matching_interests,
+                [i in interests | CASE
+                    WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
+                    ELSE 'topic'
+                END] as interest_types,
+                final_score as relevance_score
+            ORDER BY final_score DESC
+            LIMIT $max_questions
+            """
+            questions = [dict(record) for record in session.run(questions_query,
+                                                              user1=user1,
+                                                              user2=user2,
+                                                              max_questions=max_questions)]
+            if questions:
+                first_q = questions[0]
+                logger.info(f"Sample question:")
+                logger.info(f"Title: {first_q.get('title', 'No title')}")
+                logger.info(f"Author: {first_q.get('author', 'No author')}")
+                logger.info(f"Score: {first_q.get('relevance_score', 0)}")
+                logger.info(f"Interests: {first_q.get('matching_interests', [])}")
+            logger.info(f"Found {len(questions)} questions with common interests")
+            return questions
+def process_body(text, title):
+    """Process question body to handle images and HTML."""
+    if not text:
+        logger.warning(f"Empty body for question: {title}")
+        return ""
+    try:
+        from bs4 import BeautifulSoup
+        # Parse the HTML content
+        soup = BeautifulSoup(str(text), 'html.parser')
+        # Function to fix Stack Exchange URLs
+        def fix_stack_exchange_url(url):
+            if not url:
+                return url
+            if url.startswith(('http://', 'https://')):
+                return url
+            if url.startswith('//'):
+                return 'https:' + url
+            if url.startswith('/'):
+                return 'https://i.stack.imgur.com' + url
+            return 'https://i.stack.imgur.com/' + url
+        # Find all img tags and replace with preview cards
+        for img in soup.find_all('img'):
+            src = img.get('src', '')
+            if not src:
+                continue
+            fixed_src = fix_stack_exchange_url(src)
+            alt_text = img.get('alt', '').strip()
+            if not alt_text or alt_text.lower() == 'enter image description here':
+                alt_text = 'Question image'
+            # Create an image preview card
+            preview_html = f"""
+            <div class="image-preview" style="margin: 10px 0; padding: 10px; background: rgba(30, 41, 59, 0.4); border-radius: 6px;">
+                <div style="display: flex; align-items: center; margin-bottom: 8px;">
+                    <span style="font-size: 20px; margin-right: 8px;">🖼️</span>
+                    <span style="color: #93c5fd;">{alt_text}</span>
+                </div>
+                <a href="{fixed_src}" target="_blank" rel="noopener noreferrer"
+                   style="color: #60a5fa; text-decoration: none;">View image</a>
+            </div>
+            """
+            new_soup = BeautifulSoup(preview_html, 'html.parser')
+            img.replace_with(new_soup)
+        # Style other elements
+        for link in soup.find_all('a'):
+            if 'View Image' not in (link.get_text() or ''):
+                href = link.get('href', '')
+                if href and not href.startswith(('http://', 'https://')):
+                    link['href'] = fix_stack_exchange_url(href)
+                link['target'] = '_blank'
+                link['rel'] = 'noopener noreferrer'
+                link['style'] = 'color: #60a5fa; text-decoration: none;'
+        # Add paragraph styling
+        for p in soup.find_all(['p', 'div']):
+            if not any(cls in (p.get('class', []) or []) for cls in ['image-preview', 'question-card']):
+                current_style = p.get('style', '')
+                p['style'] = f"{current_style}; margin: 0.8em 0; line-height: 1.6; color: #e2e8f0;"
+        # Add list styling
+        for ul in soup.find_all(['ul', 'ol']):
+            ul['style'] = 'margin: 0.8em 0; padding-left: 1.5em; color: #e2e8f0;'
+        for li in soup.find_all('li'):
+            li['style'] = 'margin: 0.4em 0; line-height: 1.6; color: #e2e8f0;'
+        # Add code block styling
+        for code in soup.find_all(['code', 'pre']):
+            code['style'] = 'background: rgba(30, 41, 59, 0.5); padding: 0.2em 0.4em; border-radius: 4px; font-family: monospace; color: #e2e8f0;'
+        return str(soup)
+    except Exception as e:
+        logger.error(f"Error processing question body: {str(e)}")
+        return str(text) if text else ""
 def format_question(q: Dict) -> str:
+    """Format a question for display based on its source."""
+    try:
+        # Extract and validate basic question data
+        title = q.get('title', 'Untitled')
+        source = q.get('source', '').lower()  # Convert to lowercase for consistent comparison
+        # Log available fields for debugging
+        logger.info(f"Question fields: {list(q.keys())}")
+        if 'created_utc_ts' in q:
+            logger.info(f"Raw created_utc_ts value: {q['created_utc_ts']}")
+        # Format metadata section based on source
+        metadata_html = ""
+        content_html = ""
+        # Default metadata for questions with author/date
+        if 'author' in q or 'created_utc_ts' in q:
+            author = q.get('author', 'Unknown author')
+            created_date = format_neo4j_datetime(q.get('created_utc_ts'))
+            logger.info(f"Question {title}: author={author}, date={created_date}")
+            upvotes = q.get('upvotes', 0)
+            num_comments = q.get('num_comments', 0)
+            metadata_html = f"""
+            <div class="question-meta" style="font-size: 0.9em; color: #cbd5e1; margin-bottom: 15px;">
+                <span style="color: #93c5fd; font-weight: 500;">{author}</span>
+                {' asked' if source == 'stack_exchange' else ' posted'} on
+                <span style="color: #94a3b8;">{created_date}</span>
+                <div class="stats" style="margin-top: 5px;">
+                    <span title="Upvotes"><span style="color: #93c5fd;">▲</span> {upvotes}</span>
+                    <span style="margin-left: 15px;" title="Comments"><span style="color: #93c5fd;">💬</span> {num_comments}</span>
+                </div>
+            </div>
+            """
+        # Handle content based on source and available fields
+        if source == "stack_exchange":
+            body = q.get('body', '')
+            if body:
+                content_html = f"""
+                <div class="question-content" style="margin-top: 20px; font-family: 'Segoe UI', system-ui, -apple-system, sans-serif; color: #e2e8f0; line-height: 1.6;">
+                    {process_body(body, title)}
+                </div>
+                """
+        elif source == "trivia":
+            correct_answer = q.get('correct_answer', '')
+            incorrect_answers = q.get('incorrect_answers', [])
+            # Create answer options HTML
+            answers = [correct_answer] + incorrect_answers if incorrect_answers else [correct_answer]
+            answers_html = "".join([
+                f"""
+                <div class="answer-option" style="margin: 8px 0; padding: 10px; background: rgba(51, 65, 85, 0.4); border-radius: 6px; border-left: 3px solid {'#10b981' if answer == correct_answer else '#475569'};">
+                    <span style="color: {'#34d399' if answer == correct_answer else '#94a3b8'};">
+                        {answer}
+                    </span>
+                </div>
+                """
+                for answer in answers
+            ])
+            content_html = f"""
+            <div class="answers-container" style="margin-top: 15px;">
+                <div style="color: #94a3b8; margin-bottom: 10px;">Answer options:</div>
+                {answers_html}
+            </div>
+            """
+        elif source == "wikipedia":
+            correct_answer = q.get('correct_answer', '')
+            if correct_answer:
+                content_html = f"""
+                <div class="answer" style="margin-top: 15px; padding: 15px; background: rgba(51, 65, 85, 0.4); border-radius: 6px; border-left: 3px solid #10b981;">
+                    <div style="color: #94a3b8; margin-bottom: 10px;">Answer:</div>
+                    <div style="color: #34d399;">{correct_answer}</div>
+                </div>
+                """
+        elif source == "reddit":
+            # Add subreddit to metadata if available
+            if 'subreddit' in q:
+                subreddit = q.get('subreddit', '')
+                metadata_html = metadata_html.replace(
+                    'posted on',
+                    f'posted in <span style="color: #60a5fa; font-weight: 500;">r/{subreddit}</span> on'
+                )
+        # If no specific content is set, try to use any available content fields
+        if not content_html:
+            if 'body' in q:
+                content_html = f"""
+                <div class="question-content" style="margin-top: 20px; font-family: 'Segoe UI', system-ui, -apple-system, sans-serif; color: #e2e8f0; line-height: 1.6;">
+                    {process_body(q['body'], title)}
+                </div>
+                """
+            elif 'correct_answer' in q:
+                content_html = f"""
+                <div class="answer" style="margin-top: 15px; padding: 15px; background: rgba(51, 65, 85, 0.4); border-radius: 6px;">
+                    <div style="color: #94a3b8; margin-bottom: 10px;">Answer:</div>
+                    <div style="color: #e2e8f0;">{q['correct_answer']}</div>
+                </div>
+                """
+        # Get source-specific icon and color
+        source_icon = {
+            'stack_exchange': '⚡', # Lightning bolt for Stack Exchange
+            'reddit': '🔸',  # Orange diamond for Reddit
+            'wikipedia': '📚',  # Books for Wikipedia
+            'trivia': '🎯',  # Target/bullseye for Trivia
+        }.get(source, '❔')  # Question mark as fallback
+        source_color = {
+            'stack_exchange': '#60a5fa',  # Blue
+            'reddit': '#f97316',  # Orange
+            'wikipedia': '#22c55e',  # Green
+            'trivia': '#eab308',  # Yellow
+        }.get(source, '#60a5fa')  # Default blue
+        # Create the source badge with icon
+        source_display = source.title() if source else "Unknown"
+        source_badge = f"""
+        <div class="source-badge" style="display: inline-flex; align-items: center; padding: 4px 8px; background: rgba(51, 65, 85, 0.5); border-radius: 4px; margin-right: 10px; border: 1px solid {source_color}25;">
+            <span style="margin-right: 6px; font-size: 1.1em;">{source_icon}</span>
+            <span style="color: {source_color}; font-size: 0.9em; font-weight: 500;">{source_display}</span>
+        </div>
+        """
+        # Handle matching interests display
+        matching_interests = q.get('matching_interests', [])
+        interest_types = q.get('interest_types', [])
+        interests_with_types = []
+        for interest, type_ in zip(matching_interests, interest_types):
+            if interest and type_:
+                interests_with_types.append({
+                    'name': interest,
+                    'type': type_
+                })
+        # Format interests by type
+        keywords = [i['name'] for i in interests_with_types if i['type'] == 'keyword']
+        topics = [i['name'] for i in interests_with_types if i['type'] == 'topic']
+        # Create interests display string
+        interests_display = []
+        if keywords:
+            interests_display.append(f"Keywords: {format_interest_list(set(keywords), max_items=3)}")
+        if topics:
+            interests_display.append(f"Topics: {format_interest_list(set(topics), max_items=3)}")
+        interests_str = " | ".join(interests_display) if interests_display else "No common interests found"
+        # Calculate relevance score display
+        relevance_score = q.get('relevance_score', 0)
+        score_display = f"""
+        <div class="relevance-score" style="display: inline-block; padding: 4px 8px; background: rgba(59, 130, 246, 0.2); border-radius: 4px; margin-left: 10px;">
+            <span style="color: #93c5fd; font-size: 0.9em;">Relevance: {relevance_score:.2f}</span>
+        </div>
+        """ if relevance_score > 0 else ""
+        # Create the question card HTML
+        question_html = f"""
+        <div class="question-card" style="background: rgba(51, 65, 85, 0.5); padding: 20px; border-radius: 8px; margin: 15px 0; border: 1px solid rgba(148, 163, 184, 0.2);">
+            <div class="question-header" style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 15px;">
+                <div style="flex: 1; display: flex; align-items: center;">
+                    {source_badge}
+                    <h3 style="color: #60a5fa; margin: 0; font-size: 1.4em; display: inline;">{title}</h3>
+                </div>
+                {score_display}
+            </div>
+            {metadata_html}
+            <div class="interests-bar" style="margin: 15px 0; padding: 10px; background: rgba(30, 41, 59, 0.4); border-radius: 6px; border-left: 3px solid #3b82f6;">
+                <div style="color: #94a3b8; font-size: 0.9em;">Common Interests:</div>
+                <div style="color: #93c5fd; font-weight: 500; margin-top: 5px;">{interests_str}</div>
+            </div>
+            {content_html}
+        </div>
+        """
+        return question_html
+    except Exception as e:
+        logger.error(f"Error formatting question: {str(e)}")
+        return f"""
+        <div style="background: rgba(239, 68, 68, 0.2); padding: 15px; border-radius: 8px; margin: 10px 0; border: 1px solid rgba(239, 68, 68, 0.3);">
+            <div style="color: #fca5a5;">Error displaying question: {str(e)}</div>
+        </div>
+        """
 def loading_message() -> Tuple[str, str, str]:
     """Return loading message in proper HTML format."""
     """
     return loading_html, loading_html, loading_html
+def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dict]]:
     """Main function to get recommendations and user interests."""
+    # Extract actual usernames from the formatted strings
+    user1 = user1.split(" (")[0] if " (" in user1 else user1
+    user2 = user2.split(" (")[0] if " (" in user2 else user2
     recommender = QuestionRecommender()
     try:
         # Get interests for both users
         # Format interests summary
         interests_summary = f"""
+        <div class="interests-summary">
+            <div class="user-interests">
+                <h3>{user1}'s Interests</h3>
+                <div class="interest-section">
+                    <strong>Keywords:</strong> {format_interest_list(user1_interests['keywords'], max_items=8)}
+                </div>
+                <div class="interest-section">
+                    <strong>Topics:</strong> {format_interest_list(user1_interests['topics'], max_items=5)}
+                </div>
+            </div>
+            <div class="user-interests">
+                <h3>{user2}'s Interests</h3>
+                <div class="interest-section">
+                    <strong>Keywords:</strong> {format_interest_list(user2_interests['keywords'], max_items=8)}
+                </div>
+                <div class="interest-section">
+                    <strong>Topics:</strong> {format_interest_list(user2_interests['topics'], max_items=5)}
+                </div>
+            </div>
+            <div class="common-interests">
+                <h3>Common Interests</h3>
+                <div class="interest-section">
+                    <strong>Keywords:</strong> {format_interest_list(common_keywords, max_items=8)}
+                </div>
+                <div class="interest-section">
+                    <strong>Topics:</strong> {format_interest_list(common_topics, max_items=5)}
+                </div>
+            </div>
         </div>
+        """
+        # Get all recommended questions
+        questions = recommender.find_common_questions(user1, user2, max_questions=50)
         if questions:
+            questions_text = '<div class="questions-container">\n' + \
+                           '\n'.join(format_question(q) for q in questions) + \
+                           '\n</div>'
+            recommendation_type = '<h2 class="recommendation-header">Recommendations Based on Common Interests</h2>'
         else:
+            questions_text = '<div class="no-questions">No questions found based on common interests.</div>'
+            recommendation_type = '<h2 class="recommendation-header">No Recommendations Available</h2>'
+        return interests_summary, recommendation_type, questions_text, questions
     except Exception as e:
         logger.error(f"Error in recommend_questions: {str(e)}")
         return (
+            '<div class="error">Error fetching user interests. Please try again.</div>',
+            '<h2 class="error-header">Error</h2>',
+            f'<div class="error-message">An error occurred: {str(e)}</div>',
+            []
         )
     finally:
         recommender.close()
 }
 """
+def main():
+    # Create Gradio interface
+    recommender = QuestionRecommender()
+    users = recommender.get_all_users()
+    recommender.close()
+    with gr.Blocks(title="Question Recommender (Local Debug)", theme=gr.themes.Soft(), css=custom_css) as iface:
+        gr.Markdown("""
+        # 🤝 Question Recommender (Local Debug Version)
+        Find questions that two users might be interested in discussing together based on their common interests.
+        > This is the local debug version using the test database.
+        """)
+        with gr.Row(equal_height=True):
+            with gr.Column(scale=1):
+                user1_dropdown = gr.Dropdown(
+                    choices=users,
+                    label="👤 First User",
+                    interactive=True,
+                    elem_id="user1-input"
+                )
+            with gr.Column(scale=1):
+                user2_dropdown = gr.Dropdown(
+                    choices=users,
+                    label="👤 Second User",
+                    interactive=True,
+                    elem_id="user2-input"
+                )
+        recommend_btn = gr.Button(
+            "🔍 Get Recommendations",
+            variant="primary",
+            size="lg"
+        )
+        with gr.Row():
+            interests_output = gr.HTML(label="Common Interests")
+        recommendation_type = gr.HTML()
+        questions_output = gr.HTML()
+        def recommend_and_store(user1, user2):
+            """Get recommendations and store questions."""
+            interests, rec_type, questions_html, questions_data = recommend_questions(user1, user2)
+            return interests, rec_type, questions_html
+        # Wire up the components
+        recommend_btn.click(
+            fn=loading_message,
+            outputs=[interests_output, recommendation_type, questions_output],
+            queue=False
+        ).then(
+            fn=recommend_and_store,
+            inputs=[user1_dropdown, user2_dropdown],
+            outputs=[interests_output, recommendation_type, questions_output]
+        )
+    # Launch with additional debug info
+    logger.info("Starting local debug version of Question Recommender")
+    iface.launch(
+        show_error=True,
+        server_name="127.0.0.1",
+        server_port=7860
     )
+if __name__ == "__main__":
+    main()