Spaces:
Running
Running
| """ | |
| FomoFeed - User Profiler AI | |
| Creates semantic user profiles using Turkish BERT embeddings | |
| """ | |
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| from transformers import AutoTokenizer, AutoModel | |
| import torch | |
| import numpy as np | |
| from datetime import datetime | |
| from collections import Counter | |
| import uvicorn | |
| app = FastAPI(title="FomoFeed User Profiler", version="1.0.0") | |
| # Load Turkish BERT model | |
| MODEL_NAME = "dbmdz/bert-base-turkish-cased" | |
| tokenizer = None | |
| model = None | |
| def load_model(): | |
| global tokenizer, model | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModel.from_pretrained(MODEL_NAME) | |
| model.eval() | |
| print(f"✅ Model loaded: {MODEL_NAME}") | |
| except Exception as e: | |
| print(f"⚠️ Model load failed: {e}") | |
| # Load model at startup | |
| load_model() | |
| class UserActivity(BaseModel): | |
| user_id: int | |
| post_captions: list[str] = [] | |
| moment_captions: list[str] = [] | |
| liked_tags: list[str] = [] | |
| saved_tags: list[str] = [] | |
| commented_tags: list[str] = [] | |
| engagement_hours: list[int] = [] | |
| engagement_types: list[str] = [] # view, like, comment, save | |
| class UserProfileResponse(BaseModel): | |
| user_id: int | |
| interests: list[str] | |
| content_preference: dict | |
| activity_pattern: dict | |
| engagement_style: dict | |
| optimal_hours: list[int] | |
| confidence: float | |
| def get_bert_embedding(text: str) -> np.ndarray: | |
| """ | |
| Get BERT embedding for text | |
| """ | |
| if not text or model is None or tokenizer is None: | |
| return np.zeros(768) | |
| try: | |
| inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding=True) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| # Use CLS token embedding | |
| embedding = outputs.last_hidden_state[:, 0, :].numpy()[0] | |
| return embedding | |
| except Exception as e: | |
| print(f"Embedding error: {e}") | |
| return np.zeros(768) | |
| def analyze_interests(activity: UserActivity) -> list[str]: | |
| """ | |
| Extract user interests from their activity | |
| """ | |
| all_tags = ( | |
| activity.liked_tags * 2 + # Weight likes lower | |
| activity.saved_tags * 5 + # Weight saves higher | |
| activity.commented_tags * 3 # Weight comments medium | |
| ) | |
| if not all_tags: | |
| return [] | |
| # Count frequency | |
| tag_counts = Counter(all_tags) | |
| # Get top 10 tags | |
| top_tags = [tag for tag, count in tag_counts.most_common(10)] | |
| return top_tags | |
| def analyze_content_preference(activity: UserActivity) -> dict: | |
| """ | |
| Analyze user's content consumption preferences | |
| """ | |
| total_engagement = len(activity.engagement_types) | |
| if total_engagement == 0: | |
| return { | |
| "passive_consumer": 0.5, | |
| "active_engager": 0.5, | |
| "content_creator": 0.0 | |
| } | |
| # Count engagement types | |
| engagement_counts = Counter(activity.engagement_types) | |
| views = engagement_counts.get("view", 0) | |
| likes = engagement_counts.get("like", 0) | |
| comments = engagement_counts.get("comment", 0) | |
| saves = engagement_counts.get("save", 0) | |
| # Calculate scores | |
| passive_score = views / total_engagement if views > 0 else 0 | |
| active_score = (likes + saves) / total_engagement if (likes + saves) > 0 else 0 | |
| creator_score = len(activity.post_captions + activity.moment_captions) / 10 # Normalize | |
| creator_score = min(creator_score, 1.0) | |
| return { | |
| "passive_consumer": round(passive_score, 2), | |
| "active_engager": round(active_score, 2), | |
| "content_creator": round(creator_score, 2), | |
| "engagement_depth": round((comments + saves) / max(total_engagement, 1), 2) | |
| } | |
| def analyze_activity_pattern(activity: UserActivity) -> dict: | |
| """ | |
| Analyze when user is most active | |
| """ | |
| if not activity.engagement_hours: | |
| return { | |
| "peak_hours": [19, 20, 21], | |
| "activity_distribution": "unknown", | |
| "timezone_pattern": "evening" | |
| } | |
| hour_counts = Counter(activity.engagement_hours) | |
| # Get top 5 hours | |
| peak_hours = [hour for hour, count in hour_counts.most_common(5)] | |
| # Determine pattern | |
| morning = sum(1 for h in activity.engagement_hours if 6 <= h < 12) | |
| afternoon = sum(1 for h in activity.engagement_hours if 12 <= h < 18) | |
| evening = sum(1 for h in activity.engagement_hours if 18 <= h < 24) | |
| night = sum(1 for h in activity.engagement_hours if 0 <= h < 6) | |
| total = len(activity.engagement_hours) | |
| if total > 0: | |
| distribution = { | |
| "morning": round(morning / total, 2), | |
| "afternoon": round(afternoon / total, 2), | |
| "evening": round(evening / total, 2), | |
| "night": round(night / total, 2) | |
| } | |
| # Primary pattern | |
| primary = max(distribution.items(), key=lambda x: x[1])[0] | |
| else: | |
| distribution = {"morning": 0.25, "afternoon": 0.25, "evening": 0.25, "night": 0.25} | |
| primary = "evening" | |
| return { | |
| "peak_hours": peak_hours[:3], | |
| "activity_distribution": distribution, | |
| "timezone_pattern": primary | |
| } | |
| def analyze_engagement_style(activity: UserActivity) -> dict: | |
| """ | |
| Classify user's engagement style | |
| """ | |
| engagement_counts = Counter(activity.engagement_types) | |
| total = sum(engagement_counts.values()) | |
| if total == 0: | |
| return { | |
| "style": "new_user", | |
| "interaction_rate": 0.0, | |
| "content_saver": False, | |
| "commenter": False | |
| } | |
| # Calculate metrics | |
| views = engagement_counts.get("view", 0) | |
| likes = engagement_counts.get("like", 0) | |
| comments = engagement_counts.get("comment", 0) | |
| saves = engagement_counts.get("save", 0) | |
| interaction_rate = (likes + comments + saves) / total | |
| # Classify style | |
| if interaction_rate < 0.1: | |
| style = "lurker" | |
| elif interaction_rate < 0.3: | |
| style = "casual" | |
| elif interaction_rate < 0.6: | |
| style = "active" | |
| else: | |
| style = "power_user" | |
| return { | |
| "style": style, | |
| "interaction_rate": round(interaction_rate, 2), | |
| "content_saver": saves > (total * 0.05), | |
| "commenter": comments > (total * 0.02), | |
| "engagement_breakdown": { | |
| "views": views, | |
| "likes": likes, | |
| "comments": comments, | |
| "saves": saves | |
| } | |
| } | |
| def calculate_confidence(activity: UserActivity) -> float: | |
| """ | |
| Calculate confidence score based on data volume | |
| """ | |
| # Count data points | |
| total_captions = len(activity.post_captions) + len(activity.moment_captions) | |
| total_tags = len(activity.liked_tags) + len(activity.saved_tags) + len(activity.commented_tags) | |
| total_engagements = len(activity.engagement_types) | |
| # Score each dimension | |
| caption_score = min(total_captions / 20, 1.0) * 0.3 | |
| tag_score = min(total_tags / 50, 1.0) * 0.4 | |
| engagement_score = min(total_engagements / 100, 1.0) * 0.3 | |
| confidence = caption_score + tag_score + engagement_score | |
| return round(confidence, 2) | |
| def root(): | |
| return { | |
| "service": "FomoFeed User Profiler", | |
| "status": "active", | |
| "model": "turkish-bert" if model else "rule-based", | |
| "version": "1.0.0" | |
| } | |
| def health(): | |
| return { | |
| "status": "healthy", | |
| "model_loaded": model is not None, | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| def create_profile(activity: UserActivity): | |
| """ | |
| Create comprehensive user profile from activity data | |
| """ | |
| try: | |
| # Analyze different aspects | |
| interests = analyze_interests(activity) | |
| content_pref = analyze_content_preference(activity) | |
| activity_pattern = analyze_activity_pattern(activity) | |
| engagement_style = analyze_engagement_style(activity) | |
| confidence = calculate_confidence(activity) | |
| # Extract optimal hours | |
| optimal_hours = activity_pattern["peak_hours"] | |
| return UserProfileResponse( | |
| user_id=activity.user_id, | |
| interests=interests, | |
| content_preference=content_pref, | |
| activity_pattern=activity_pattern, | |
| engagement_style=engagement_style, | |
| optimal_hours=optimal_hours, | |
| confidence=confidence | |
| ) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| def get_user_embedding(activity: UserActivity): | |
| """ | |
| Generate BERT embedding for user based on their content | |
| """ | |
| try: | |
| # Combine all text | |
| all_text = " ".join( | |
| activity.post_captions + | |
| activity.moment_captions + | |
| activity.liked_tags + | |
| activity.saved_tags | |
| ) | |
| if not all_text.strip(): | |
| return { | |
| "user_id": activity.user_id, | |
| "embedding": [0.0] * 768, | |
| "note": "No text data available" | |
| } | |
| # Get embedding | |
| embedding = get_bert_embedding(all_text[:1000]) # Limit to 1000 chars | |
| return { | |
| "user_id": activity.user_id, | |
| "embedding": embedding.tolist(), | |
| "dimension": 768 | |
| } | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| def batch_profile(activities: list[UserActivity]): | |
| """ | |
| Create profiles for multiple users | |
| """ | |
| try: | |
| profiles = [] | |
| for activity in activities: | |
| interests = analyze_interests(activity) | |
| content_pref = analyze_content_preference(activity) | |
| activity_pattern = analyze_activity_pattern(activity) | |
| engagement_style = analyze_engagement_style(activity) | |
| confidence = calculate_confidence(activity) | |
| profiles.append({ | |
| "user_id": activity.user_id, | |
| "interests": interests, | |
| "content_preference": content_pref, | |
| "activity_pattern": activity_pattern, | |
| "engagement_style": engagement_style, | |
| "optimal_hours": activity_pattern["peak_hours"], | |
| "confidence": confidence | |
| }) | |
| return {"profiles": profiles} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |