Spaces:

bardd
/

Rec_pt

Sleeping

App Files Files Community

bardd commited on Sep 20, 2024

Commit

b7bf6bb

verified ·

1 Parent(s): f108433

Upload 6 files

Browse files

Files changed (6) hide show

Dockerfile +38 -0
all_columns.joblib +3 -0
app.py +6 -0
main.py +188 -0
requirements.txt +7 -0
svd_model.joblib +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,38 @@

+FROM python:3.11-slim
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=UTC
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    libsqlite3-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+    pip install --no-cache-dir \
+    fastapi==0.113.0 \
+    pymongo==4.9.1 \
+    pandas==2.2.3 \
+    numpy==1.26.4 \
+    scikit-learn==1.5.2 \
+    joblib==1.4.2 \
+    uvicorn==0.30.6
+# Copy your application files
+COPY . .
+# Create logs directory
+RUN mkdir -p /app/logs
+# Expose the port
+EXPOSE 7860
+# Command to run your application
+CMD ["python", "app.py"]

all_columns.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17ca0b7152fe6f2a584024284b878545275848e7811f732ac20b29f13de44202
+size 31936

app.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import uvicorn
+from main import app
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

main.py ADDED Viewed

	@@ -0,0 +1,188 @@

+from fastapi import FastAPI, BackgroundTasks
+from contextlib import asynccontextmanager
+from pymongo import MongoClient
+import pandas as pd
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+import joblib
+import asyncio
+import logging
+from logging.handlers import RotatingFileHandler
+import os
+from datetime import datetime
+# Set up logging
+log_directory = "logs"
+if not os.path.exists(log_directory):
+    os.makedirs(log_directory)
+log_file = os.path.join(log_directory, f"app_{datetime.now().strftime('%Y%m%d')}.log")
+# Configure logging to write to both file and console
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                    handlers=[
+                        RotatingFileHandler(log_file, maxBytes=10000000, backupCount=5),
+                        logging.StreamHandler()
+                    ])
+logger = logging.getLogger(__name__)
+# MongoDB connection setup
+db_name = 'property-listing'
+collection_name = 'synthetic_user_behavior_owais'
+connection_string = os.getenv('CONNECTION_STRING')
+client = MongoClient(connection_string)
+db = client[db_name]
+collection = db[collection_name]
+# Load pre-trained SVD model and user-item matrix columns
+svd = joblib.load('svd_model.joblib')
+user_item_matrix_columns = joblib.load('all_columns.joblib')
+item_factors = svd.components_.T
+# Define the actions we're interested in
+ALL_COLUMNS = ['nxt_img_listing', 'read_more_listing', 'nxt_img_detail', 'read_more_detail', 'time_spent']
+# Global variables to store the latest session and recommendations
+latest_session_id = None
+latest_recommendations = None
+async def check_for_new_session():
+    global latest_session_id, latest_recommendations
+    last_document_count = 0
+    while True:
+        try:
+            # Find the most recent document in the collection
+            latest_doc = collection.find_one(sort=[('timestamp', -1)])
+            current_document_count = collection.count_documents({})
+            if latest_doc:
+                if latest_doc['sessionId'] != latest_session_id or current_document_count > last_document_count:
+                    latest_session_id = latest_doc['sessionId']
+                    logger.info(f"New activity detected for session: {latest_session_id}")
+                    latest_recommendations = generate_recommendations_for_session(latest_session_id)
+                    if latest_recommendations:
+                        logger.info(f"Generated recommendations for session {latest_session_id}: {latest_recommendations}")
+                    else:
+                        logger.warning(f"No recommendations generated for session {latest_session_id}")
+                    last_document_count = current_document_count
+                else:
+                    logger.info("No new activity detected")
+            else:
+                logger.warning("No documents found in the collection")
+            await asyncio.sleep(5)  # Check every 5 seconds
+        except Exception as e:
+            logger.error(f"Error in check_for_new_session: {e}")
+            await asyncio.sleep(5)  # Wait before retrying
+def generate_recommendations_for_session(session_id):
+    try:
+        # Retrieve all documents for the given session
+        session_data = list(collection.find({'sessionId': session_id}))
+        if not session_data:
+            logger.warning(f"No data found for session {session_id}")
+            return None
+        # Convert session data to a DataFrame
+        raw_df = pd.DataFrame(session_data)
+        # Aggregate data by id and action
+        aggregated_data = raw_df.groupby(['id', 'action']).agg(
+            presence=('action', 'size'),
+            total_duration=('duration', 'sum')
+        ).reset_index()
+        # Create a pivot table from the aggregated data
+        pivot_df = aggregated_data.pivot_table(
+            index=['id'],
+            columns='action',
+            values=['presence', 'total_duration'],
+            fill_value=0
+        )
+        # Flatten column names
+        pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]
+        # Ensure all expected columns exist in the pivot table
+        for col in ALL_COLUMNS:
+            if f'presence_{col}' not in pivot_df.columns and col != 'time_spent':
+                pivot_df[f'presence_{col}'] = 0
+            elif col == 'time_spent' and 'total_duration_time_spent' not in pivot_df.columns:
+                pivot_df['total_duration_time_spent'] = 0
+        # Calculate interaction score for each row
+        pivot_df['interaction_score'] = pivot_df.apply(calculate_interaction_score, axis=1)
+        # Create a user vector based on the interaction scores
+        user_vector = pd.Series(index=user_item_matrix_columns, dtype=float).fillna(0)
+        for property_id, score in pivot_df['interaction_score'].items():
+            if property_id in user_vector.index:
+                user_vector[property_id] = score
+        # Transform the user vector using the SVD model
+        user_vector_array = user_vector.values.reshape(1, -1)
+        user_latent = svd.transform(user_vector_array)
+        # Calculate similarity scores between the user vector and item factors
+        similarity_scores = cosine_similarity(user_latent, item_factors)
+        # Get the indices of the top 10 most similar items
+        top_indices = similarity_scores.argsort()[0][-10:][::-1]
+        # Get the corresponding property IDs for the top indices
+        recommendations = user_item_matrix_columns[top_indices].tolist()
+        return recommendations
+    except Exception as e:
+        logger.error(f"Error in generate_recommendations_for_session: {e}")
+        return None
+def calculate_interaction_score(row):
+    try:
+        # Calculate the score based on the presence of different actions
+        score = (
+            row.get('presence_nxt_img_listing', 0) * 1 +
+            row.get('presence_read_more_listing', 0) * 2 +
+            row.get('presence_nxt_img_detail', 0) * 3 +
+            row.get('presence_read_more_detail', 0) * 4 +
+            row.get('total_duration_time_spent', 0) / 10
+        )
+        # Apply bounce penalty if the session duration is less than 15 seconds
+        if 'total_duration_time_spent' in row and row['total_duration_time_spent'] < 15:
+            score -= 10
+        return score
+    except Exception as e:
+        logger.error(f"Error in calculate_interaction_score: {e}")
+        return 0
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup: create background task
+    task = asyncio.create_task(check_for_new_session())
+    yield
+    # Shutdown: cancel background task
+    task.cancel()
+    try:
+        await task
+    except asyncio.CancelledError:
+        logger.info("Background task cancelled")
+# Create FastAPI application instance
+app = FastAPI(lifespan=lifespan)
+@app.get("/")
+async def root():
+    return {"message": "Welcome to the Rec API"}
+@app.get("/recommendations")
+async def get_recommendations():
+    if latest_recommendations:
+        logger.info(f"Generated recommendations: {{'recommendations': {latest_recommendations}, 'session_id': '{latest_session_id}'}}")
+        return {"recommendations": latest_recommendations, "session_id": latest_session_id}
+    else:
+        return {"message": "No recommendations available yet", "session_id": latest_session_id}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi==0.113.0
+pymongo==4.9.1
+pandas==2.2.3
+numpy==1.26.4
+sklearn==1.5.2
+joblib==1.4.2
+uvicorn==0.30.6

svd_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d806bc456b2d81eb497dc520666790484843d525e55cbfb3add02084bf0d97cf
+size 143063