""" FomoFeed - User Profiler AI Creates semantic user profiles using Turkish BERT embeddings """ from fastapi import FastAPI, HTTPException from pydantic import BaseModel from transformers import AutoTokenizer, AutoModel import torch import numpy as np from datetime import datetime from collections import Counter import uvicorn app = FastAPI(title="FomoFeed User Profiler", version="1.0.0") # Load Turkish BERT model MODEL_NAME = "dbmdz/bert-base-turkish-cased" tokenizer = None model = None def load_model(): global tokenizer, model try: tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModel.from_pretrained(MODEL_NAME) model.eval() print(f"✅ Model loaded: {MODEL_NAME}") except Exception as e: print(f"⚠️ Model load failed: {e}") # Load model at startup load_model() class UserActivity(BaseModel): user_id: int post_captions: list[str] = [] moment_captions: list[str] = [] liked_tags: list[str] = [] saved_tags: list[str] = [] commented_tags: list[str] = [] engagement_hours: list[int] = [] engagement_types: list[str] = [] # view, like, comment, save class UserProfileResponse(BaseModel): user_id: int interests: list[str] content_preference: dict activity_pattern: dict engagement_style: dict optimal_hours: list[int] confidence: float def get_bert_embedding(text: str) -> np.ndarray: """ Get BERT embedding for text """ if not text or model is None or tokenizer is None: return np.zeros(768) try: inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding=True) with torch.no_grad(): outputs = model(**inputs) # Use CLS token embedding embedding = outputs.last_hidden_state[:, 0, :].numpy()[0] return embedding except Exception as e: print(f"Embedding error: {e}") return np.zeros(768) def analyze_interests(activity: UserActivity) -> list[str]: """ Extract user interests from their activity """ all_tags = ( activity.liked_tags * 2 + # Weight likes lower activity.saved_tags * 5 + # Weight saves higher activity.commented_tags * 3 # Weight comments medium ) if not all_tags: return [] # Count frequency tag_counts = Counter(all_tags) # Get top 10 tags top_tags = [tag for tag, count in tag_counts.most_common(10)] return top_tags def analyze_content_preference(activity: UserActivity) -> dict: """ Analyze user's content consumption preferences """ total_engagement = len(activity.engagement_types) if total_engagement == 0: return { "passive_consumer": 0.5, "active_engager": 0.5, "content_creator": 0.0 } # Count engagement types engagement_counts = Counter(activity.engagement_types) views = engagement_counts.get("view", 0) likes = engagement_counts.get("like", 0) comments = engagement_counts.get("comment", 0) saves = engagement_counts.get("save", 0) # Calculate scores passive_score = views / total_engagement if views > 0 else 0 active_score = (likes + saves) / total_engagement if (likes + saves) > 0 else 0 creator_score = len(activity.post_captions + activity.moment_captions) / 10 # Normalize creator_score = min(creator_score, 1.0) return { "passive_consumer": round(passive_score, 2), "active_engager": round(active_score, 2), "content_creator": round(creator_score, 2), "engagement_depth": round((comments + saves) / max(total_engagement, 1), 2) } def analyze_activity_pattern(activity: UserActivity) -> dict: """ Analyze when user is most active """ if not activity.engagement_hours: return { "peak_hours": [19, 20, 21], "activity_distribution": "unknown", "timezone_pattern": "evening" } hour_counts = Counter(activity.engagement_hours) # Get top 5 hours peak_hours = [hour for hour, count in hour_counts.most_common(5)] # Determine pattern morning = sum(1 for h in activity.engagement_hours if 6 <= h < 12) afternoon = sum(1 for h in activity.engagement_hours if 12 <= h < 18) evening = sum(1 for h in activity.engagement_hours if 18 <= h < 24) night = sum(1 for h in activity.engagement_hours if 0 <= h < 6) total = len(activity.engagement_hours) if total > 0: distribution = { "morning": round(morning / total, 2), "afternoon": round(afternoon / total, 2), "evening": round(evening / total, 2), "night": round(night / total, 2) } # Primary pattern primary = max(distribution.items(), key=lambda x: x[1])[0] else: distribution = {"morning": 0.25, "afternoon": 0.25, "evening": 0.25, "night": 0.25} primary = "evening" return { "peak_hours": peak_hours[:3], "activity_distribution": distribution, "timezone_pattern": primary } def analyze_engagement_style(activity: UserActivity) -> dict: """ Classify user's engagement style """ engagement_counts = Counter(activity.engagement_types) total = sum(engagement_counts.values()) if total == 0: return { "style": "new_user", "interaction_rate": 0.0, "content_saver": False, "commenter": False } # Calculate metrics views = engagement_counts.get("view", 0) likes = engagement_counts.get("like", 0) comments = engagement_counts.get("comment", 0) saves = engagement_counts.get("save", 0) interaction_rate = (likes + comments + saves) / total # Classify style if interaction_rate < 0.1: style = "lurker" elif interaction_rate < 0.3: style = "casual" elif interaction_rate < 0.6: style = "active" else: style = "power_user" return { "style": style, "interaction_rate": round(interaction_rate, 2), "content_saver": saves > (total * 0.05), "commenter": comments > (total * 0.02), "engagement_breakdown": { "views": views, "likes": likes, "comments": comments, "saves": saves } } def calculate_confidence(activity: UserActivity) -> float: """ Calculate confidence score based on data volume """ # Count data points total_captions = len(activity.post_captions) + len(activity.moment_captions) total_tags = len(activity.liked_tags) + len(activity.saved_tags) + len(activity.commented_tags) total_engagements = len(activity.engagement_types) # Score each dimension caption_score = min(total_captions / 20, 1.0) * 0.3 tag_score = min(total_tags / 50, 1.0) * 0.4 engagement_score = min(total_engagements / 100, 1.0) * 0.3 confidence = caption_score + tag_score + engagement_score return round(confidence, 2) @app.get("/") def root(): return { "service": "FomoFeed User Profiler", "status": "active", "model": "turkish-bert" if model else "rule-based", "version": "1.0.0" } @app.get("/health") def health(): return { "status": "healthy", "model_loaded": model is not None, "timestamp": datetime.now().isoformat() } @app.post("/profile", response_model=UserProfileResponse) def create_profile(activity: UserActivity): """ Create comprehensive user profile from activity data """ try: # Analyze different aspects interests = analyze_interests(activity) content_pref = analyze_content_preference(activity) activity_pattern = analyze_activity_pattern(activity) engagement_style = analyze_engagement_style(activity) confidence = calculate_confidence(activity) # Extract optimal hours optimal_hours = activity_pattern["peak_hours"] return UserProfileResponse( user_id=activity.user_id, interests=interests, content_preference=content_pref, activity_pattern=activity_pattern, engagement_style=engagement_style, optimal_hours=optimal_hours, confidence=confidence ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/embedding") def get_user_embedding(activity: UserActivity): """ Generate BERT embedding for user based on their content """ try: # Combine all text all_text = " ".join( activity.post_captions + activity.moment_captions + activity.liked_tags + activity.saved_tags ) if not all_text.strip(): return { "user_id": activity.user_id, "embedding": [0.0] * 768, "note": "No text data available" } # Get embedding embedding = get_bert_embedding(all_text[:1000]) # Limit to 1000 chars return { "user_id": activity.user_id, "embedding": embedding.tolist(), "dimension": 768 } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/batch_profile") def batch_profile(activities: list[UserActivity]): """ Create profiles for multiple users """ try: profiles = [] for activity in activities: interests = analyze_interests(activity) content_pref = analyze_content_preference(activity) activity_pattern = analyze_activity_pattern(activity) engagement_style = analyze_engagement_style(activity) confidence = calculate_confidence(activity) profiles.append({ "user_id": activity.user_id, "interests": interests, "content_preference": content_pref, "activity_pattern": activity_pattern, "engagement_style": engagement_style, "optimal_hours": activity_pattern["peak_hours"], "confidence": confidence }) return {"profiles": profiles} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)