Joseph Ibochi commited on
Commit
7958e55
·
1 Parent(s): 43814b4

inital commit

Browse files
Files changed (6) hide show
  1. .gitignore +1 -0
  2. Dockerfile +14 -0
  3. app/__init__.py +0 -0
  4. app/app.py +19 -0
  5. app/model.py +83 -0
  6. requirements.txt +7 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.16
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+
14
+ CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]
app/__init__.py ADDED
File without changes
app/app.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from typing import Dict, List
4
+ from app.model import RoommateMatcher
5
+
6
+ app = FastAPI()
7
+ matcher = RoommateMatcher()
8
+
9
+ class MatchRequest(BaseModel):
10
+ current_user: Dict
11
+ other_users: List[Dict]
12
+
13
+ @app.post("/match")
14
+ def match(request: MatchRequest):
15
+ try:
16
+ result = matcher.predict(request.current_user, request.other_users)
17
+ return {"matches": result}
18
+ except Exception as e:
19
+ return {"error": str(e)}
app/model.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model.py
2
+ import numpy as np
3
+ import pandas as pd
4
+ from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from sentence_transformers import SentenceTransformer
7
+ from typing import Dict, List
8
+
9
+ class RoommateMatcher:
10
+ def __init__(self):
11
+ self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
12
+ self.financial_encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
13
+ self.scaler = MinMaxScaler()
14
+ self.is_fitted = False
15
+
16
+ def predict(self, current_user: Dict, other_users: List[Dict]) -> List[Dict]:
17
+ if not self.is_fitted and other_users:
18
+ self._fit_encoders(other_users)
19
+
20
+ others_df = pd.DataFrame(other_users)
21
+ others_df['combined_text'] = others_df.apply(
22
+ lambda x: " ".join(filter(None, [
23
+ str(x.get('personal_description', '')),
24
+ str(x.get('occupation', '')),
25
+ *[str(s) for s in x.get('social_preference', [])]
26
+ ])), axis=1
27
+ )
28
+
29
+ text_embeds = self.text_model.encode(others_df['combined_text'].tolist())
30
+ text_block = text_embeds / np.linalg.norm(text_embeds, axis=1, keepdims=True)
31
+
32
+ fin_block = self.financial_encoder.transform(others_df[['financials']])
33
+ fin_block = fin_block / np.linalg.norm(fin_block, axis=1, keepdims=True)
34
+
35
+ num_features = np.hstack([
36
+ np.array([x for x in others_df['location']]),
37
+ others_df[['budget_min', 'budget_max']].values
38
+ ])
39
+ num_block = self.scaler.transform(num_features)
40
+ num_block = num_block / np.linalg.norm(num_block, axis=1, keepdims=True)
41
+
42
+ current_text = self.text_model.encode(" ".join(filter(None, [
43
+ str(current_user.get('personal_description', '')),
44
+ str(current_user.get('occupation', '')),
45
+ *[str(s) for s in current_user.get('social_preference', [])]
46
+ ])))
47
+ current_text = current_text / np.linalg.norm(current_text)
48
+
49
+ current_fin = self.financial_encoder.transform([[current_user['financials']]])
50
+ current_fin = current_fin / np.linalg.norm(current_fin)
51
+
52
+ current_num = self.scaler.transform([[
53
+ current_user['location'][0],
54
+ current_user['location'][1],
55
+ current_user['budget_min'],
56
+ current_user['budget_max']
57
+ ]])
58
+ current_num = current_num / np.linalg.norm(current_num)
59
+
60
+ combined_existing = np.hstack([
61
+ text_block * 0.6,
62
+ fin_block * 0.1,
63
+ num_block * 0.3
64
+ ])
65
+ current_block = np.hstack([
66
+ current_text.reshape(1, -1) * 0.6,
67
+ current_fin * 0.2,
68
+ current_num * 0.2
69
+ ])
70
+
71
+ others_df['similarity'] = np.round(
72
+ cosine_similarity(current_block, combined_existing)[0] * 100, 2
73
+ )
74
+
75
+ return others_df.sort_values('similarity', ascending=False).head(10).to_dict('records')
76
+
77
+ def _fit_encoders(self, users: List[Dict]):
78
+ financials = np.array([u['financials'] for u in users]).reshape(-1, 1)
79
+ locations = np.array([u['location'] for u in users])
80
+ budgets = np.array([[u['budget_min'], u['budget_max']] for u in users])
81
+ self.financial_encoder.fit(financials)
82
+ self.scaler.fit(np.hstack([locations, budgets]))
83
+ self.is_fitted = True
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ numpy
5
+ pandas
6
+ sentence-transformers>=2.2.0
7
+ scikit-learn>=1.0.0