Spaces:

Janick1
/

matches

Sleeping

App Files Files Community

Joseph Ibochi commited on Jul 14, 2025

Commit

7958e55

1 Parent(s): 43814b4

inital commit

Browse files

Files changed (6) hide show

.gitignore +1 -0
Dockerfile +14 -0
app/__init__.py +0 -0
app/app.py +19 -0
app/model.py +83 -0
requirements.txt +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM python:3.16
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]

app/__init__.py ADDED Viewed

File without changes

app/app.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from fastapi import FastAPI
+from pydantic import BaseModel
+from typing import Dict, List
+from app.model import RoommateMatcher
+app = FastAPI()
+matcher = RoommateMatcher()
+class MatchRequest(BaseModel):
+    current_user: Dict
+    other_users: List[Dict]
+@app.post("/match")
+def match(request: MatchRequest):
+    try:
+        result = matcher.predict(request.current_user, request.other_users)
+        return {"matches": result}
+    except Exception as e:
+        return {"error": str(e)}

app/model.py ADDED Viewed

	@@ -0,0 +1,83 @@

+# model.py
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
+from sklearn.metrics.pairwise import cosine_similarity
+from sentence_transformers import SentenceTransformer
+from typing import Dict, List
+class RoommateMatcher:
+    def __init__(self):
+        self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
+        self.financial_encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
+        self.scaler = MinMaxScaler()
+        self.is_fitted = False
+    def predict(self, current_user: Dict, other_users: List[Dict]) -> List[Dict]:
+        if not self.is_fitted and other_users:
+            self._fit_encoders(other_users)
+        others_df = pd.DataFrame(other_users)
+        others_df['combined_text'] = others_df.apply(
+            lambda x: " ".join(filter(None, [
+                str(x.get('personal_description', '')),
+                str(x.get('occupation', '')),
+                *[str(s) for s in x.get('social_preference', [])]
+            ])), axis=1
+        )
+        text_embeds = self.text_model.encode(others_df['combined_text'].tolist())
+        text_block = text_embeds / np.linalg.norm(text_embeds, axis=1, keepdims=True)
+        fin_block = self.financial_encoder.transform(others_df[['financials']])
+        fin_block = fin_block / np.linalg.norm(fin_block, axis=1, keepdims=True)
+        num_features = np.hstack([
+            np.array([x for x in others_df['location']]),
+            others_df[['budget_min', 'budget_max']].values
+        ])
+        num_block = self.scaler.transform(num_features)
+        num_block = num_block / np.linalg.norm(num_block, axis=1, keepdims=True)
+        current_text = self.text_model.encode(" ".join(filter(None, [
+            str(current_user.get('personal_description', '')),
+            str(current_user.get('occupation', '')),
+            *[str(s) for s in current_user.get('social_preference', [])]
+        ])))
+        current_text = current_text / np.linalg.norm(current_text)
+        current_fin = self.financial_encoder.transform([[current_user['financials']]])
+        current_fin = current_fin / np.linalg.norm(current_fin)
+        current_num = self.scaler.transform([[
+            current_user['location'][0],
+            current_user['location'][1],
+            current_user['budget_min'],
+            current_user['budget_max']
+        ]])
+        current_num = current_num / np.linalg.norm(current_num)
+        combined_existing = np.hstack([
+            text_block * 0.6,
+            fin_block * 0.1,
+            num_block * 0.3
+        ])
+        current_block = np.hstack([
+            current_text.reshape(1, -1) * 0.6,
+            current_fin * 0.2,
+            current_num * 0.2
+        ])
+        others_df['similarity'] = np.round(
+            cosine_similarity(current_block, combined_existing)[0] * 100, 2
+        )
+        return others_df.sort_values('similarity', ascending=False).head(10).to_dict('records')
+    def _fit_encoders(self, users: List[Dict]):
+        financials = np.array([u['financials'] for u in users]).reshape(-1, 1)
+        locations = np.array([u['location'] for u in users])
+        budgets = np.array([[u['budget_min'], u['budget_max']] for u in users])
+        self.financial_encoder.fit(financials)
+        self.scaler.fit(np.hstack([locations, budgets]))
+        self.is_fitted = True

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+pydantic
+numpy
+pandas
+sentence-transformers>=2.2.0
+scikit-learn>=1.0.0