bharatverse11 commited on
Commit
d209b97
·
verified ·
1 Parent(s): 9d67418

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +23 -0
  2. app.py +593 -0
  3. llm_engine.py +124 -0
  4. requirements.txt +12 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python 3.9 slim image
2
+ FROM python:3.9-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y --no-install-recommends \
9
+ build-essential \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy requirements first for caching
13
+ COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy application code
17
+ COPY . .
18
+
19
+ # Expose port (FastAPI defaults to 8000, but we can configure this)
20
+ EXPOSE 7860
21
+
22
+ # Run the application
23
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,593 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MovieRec AI - Production FastAPI Backend for Hugging Face Spaces
3
+ ================================================================
4
+ Uses trained embeddings from Kaggle for real recommendations.
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import logging
10
+ from typing import List
11
+ from datetime import datetime
12
+ import time
13
+
14
+ from fastapi import FastAPI, HTTPException, Query
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ from pydantic import BaseModel
17
+ import numpy as np
18
+ import faiss
19
+
20
+ # Configure logging
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Initialize FastAPI
25
+ app = FastAPI(
26
+ title="MovieRec AI",
27
+ description="FAANG-Grade Movie Recommendation API - Trained on MovieLens",
28
+ version="2.0.0",
29
+ docs_url="/docs",
30
+ redoc_url="/redoc"
31
+ )
32
+
33
+ # CORS for frontend access
34
+ app.add_middleware(
35
+ CORSMiddleware,
36
+ allow_origins=["*"],
37
+ allow_credentials=True,
38
+ allow_methods=["*"],
39
+ allow_headers=["*"],
40
+ )
41
+
42
+ # ============================================
43
+ # Load Trained Embeddings & FAISS Index
44
+ # ============================================
45
+
46
+ # Paths to trained files (upload these to HF Space)
47
+ ITEM_EMB_PATH = "item_embeddings.npy"
48
+ USER_EMB_PATH = "user_embeddings.npy"
49
+ INDEX_PATH = "production.index"
50
+ METADATA_PATH = "metadata.json"
51
+ MOVIES_PATH = "movies.json"
52
+
53
+ # Global variables
54
+ item_embeddings = None
55
+ user_embeddings = None
56
+ faiss_index = None
57
+ metadata = {}
58
+ movies_data = {}
59
+
60
+
61
+ def load_embeddings():
62
+ """Load embeddings and FAISS index at startup."""
63
+ global item_embeddings, user_embeddings, faiss_index, metadata, movies_data
64
+
65
+ try:
66
+ # Load metadata
67
+ if os.path.exists(METADATA_PATH):
68
+ with open(METADATA_PATH, 'r') as f:
69
+ metadata = json.load(f)
70
+ logger.info(f"Loaded metadata: {metadata}")
71
+
72
+ # Load movies metadata
73
+ if os.path.exists(MOVIES_PATH):
74
+ with open(MOVIES_PATH, 'r', encoding='utf-8') as f:
75
+ movies_data = json.load(f)
76
+ logger.info(f"Loaded {len(movies_data)} movie metadata entries")
77
+
78
+ # Load item embeddings
79
+ if os.path.exists(ITEM_EMB_PATH):
80
+ item_embeddings = np.load(ITEM_EMB_PATH, mmap_mode='r').astype(np.float32)
81
+ logger.info(f"Loaded item embeddings: {item_embeddings.shape}")
82
+
83
+ # Load user embeddings
84
+ if os.path.exists(USER_EMB_PATH):
85
+ user_embeddings = np.load(USER_EMB_PATH, mmap_mode='r').astype(np.float32)
86
+ logger.info(f"Loaded user embeddings: {user_embeddings.shape}")
87
+
88
+ # Load FAISS index
89
+ if os.path.exists(INDEX_PATH):
90
+ faiss_index = faiss.read_index(INDEX_PATH)
91
+ logger.info(f"Loaded FAISS index with {faiss_index.ntotal} vectors")
92
+ elif item_embeddings is not None:
93
+ # Build index if not provided
94
+ logger.info("Building FAISS index from embeddings...")
95
+ dim = item_embeddings.shape[1]
96
+ faiss_index = faiss.IndexFlatIP(dim)
97
+ normalized = item_embeddings.copy()
98
+ faiss.normalize_L2(normalized)
99
+ faiss_index.add(normalized)
100
+ logger.info(f"Built FAISS index with {faiss_index.ntotal} vectors")
101
+
102
+ return True
103
+ except Exception as e:
104
+ logger.error(f"Error loading embeddings: {e}")
105
+ return False
106
+
107
+
108
+ # ============================================
109
+ # Response Models
110
+ # ============================================
111
+
112
+ class ItemRecommendation(BaseModel):
113
+ item_id: int
114
+ score: float
115
+
116
+
117
+ class RecommendationResponse(BaseModel):
118
+ user_id: int
119
+ recommendations: List[ItemRecommendation]
120
+ num_candidates: int
121
+ latency_ms: float
122
+
123
+
124
+ class SimilarItemsResponse(BaseModel):
125
+ item_id: int
126
+ similar_items: List[ItemRecommendation]
127
+ latency_ms: float
128
+
129
+
130
+ class HealthResponse(BaseModel):
131
+ status: str
132
+ timestamp: str
133
+ num_items: int
134
+ num_users: int
135
+ embedding_dim: int
136
+ index_loaded: bool
137
+
138
+
139
+ # ============================================
140
+ # API Endpoints
141
+ # ============================================
142
+
143
+ @app.get("/", tags=["Info"])
144
+ async def root():
145
+ """API information."""
146
+ return {
147
+ "name": "MovieRec AI",
148
+ "version": "2.0.0 (Production)",
149
+ "description": "FAANG-Grade Movie Recommendations with trained embeddings",
150
+ "endpoints": {
151
+ "health": "/health",
152
+ "recommend": "/recommend/{user_id}",
153
+ "similar": "/similar/{item_id}",
154
+ "docs": "/docs"
155
+ },
156
+ "model": {
157
+ "num_items": metadata.get("num_items", 0),
158
+ "num_users": metadata.get("num_users", 0),
159
+ "embedding_dim": metadata.get("embedding_dim", 64)
160
+ }
161
+ }
162
+
163
+
164
+ @app.get("/health", response_model=HealthResponse, tags=["Health"])
165
+ async def health_check():
166
+ """Health check endpoint."""
167
+ return HealthResponse(
168
+ status="healthy" if faiss_index is not None else "degraded",
169
+ timestamp=datetime.now().isoformat(),
170
+ num_items=item_embeddings.shape[0] if item_embeddings is not None else 0,
171
+ num_users=user_embeddings.shape[0] if user_embeddings is not None else 0,
172
+ embedding_dim=metadata.get("embedding_dim", 64),
173
+ index_loaded=faiss_index is not None
174
+ )
175
+
176
+
177
+ @app.get("/recommend/{user_id}", response_model=RecommendationResponse, tags=["Recommendations"])
178
+ async def get_recommendations(
179
+ user_id: int,
180
+ k: int = Query(default=10, ge=1, le=100, description="Number of recommendations")
181
+ ):
182
+ """Get personalized movie recommendations for a user."""
183
+ start = time.time()
184
+
185
+ # Validate user_id
186
+ if user_embeddings is None:
187
+ raise HTTPException(status_code=503, detail="User embeddings not loaded")
188
+
189
+ if user_id < 0 or user_id >= user_embeddings.shape[0]:
190
+ raise HTTPException(status_code=404, detail=f"User {user_id} not found. Valid range: 0-{user_embeddings.shape[0]-1}")
191
+
192
+ if faiss_index is None:
193
+ raise HTTPException(status_code=503, detail="FAISS index not loaded")
194
+
195
+ # Get user embedding and normalize
196
+ user_emb = user_embeddings[user_id:user_id+1].copy()
197
+ faiss.normalize_L2(user_emb)
198
+
199
+ # Search FAISS index
200
+ distances, indices = faiss_index.search(user_emb, k)
201
+
202
+ latency_ms = (time.time() - start) * 1000
203
+
204
+ return RecommendationResponse(
205
+ user_id=user_id,
206
+ recommendations=[
207
+ ItemRecommendation(item_id=int(idx), score=round(float(dist), 4))
208
+ for idx, dist in zip(indices[0], distances[0])
209
+ ],
210
+ num_candidates=faiss_index.ntotal,
211
+ latency_ms=round(latency_ms, 2)
212
+ )
213
+
214
+
215
+ @app.get("/similar/{item_id}", response_model=SimilarItemsResponse, tags=["Recommendations"])
216
+ async def get_similar_items(
217
+ item_id: int,
218
+ k: int = Query(default=10, ge=1, le=100, description="Number of similar items")
219
+ ):
220
+ """Get movies similar to a given movie."""
221
+ start = time.time()
222
+
223
+ # Validate item_id
224
+ if item_embeddings is None:
225
+ raise HTTPException(status_code=503, detail="Item embeddings not loaded")
226
+
227
+ if item_id < 0 or item_id >= item_embeddings.shape[0]:
228
+ raise HTTPException(status_code=404, detail=f"Item {item_id} not found. Valid range: 0-{item_embeddings.shape[0]-1}")
229
+
230
+ if faiss_index is None:
231
+ raise HTTPException(status_code=503, detail="FAISS index not loaded")
232
+
233
+ # Get item embedding and normalize
234
+ item_emb = item_embeddings[item_id:item_id+1].copy()
235
+ faiss.normalize_L2(item_emb)
236
+
237
+ # Search FAISS index (k+1 because the item itself will be first result)
238
+ distances, indices = faiss_index.search(item_emb, k + 1)
239
+
240
+ # Filter out the query item itself
241
+ results = [(idx, dist) for idx, dist in zip(indices[0], distances[0]) if idx != item_id][:k]
242
+
243
+ latency_ms = (time.time() - start) * 1000
244
+
245
+ return SimilarItemsResponse(
246
+ item_id=item_id,
247
+ similar_items=[
248
+ ItemRecommendation(item_id=int(idx), score=round(float(dist), 4))
249
+ for idx, dist in results
250
+ ],
251
+ latency_ms=round(latency_ms, 2)
252
+ )
253
+
254
+
255
+ @app.get("/stats", tags=["Info"])
256
+ async def get_stats():
257
+ """Get system statistics."""
258
+ return {
259
+ "embeddings": {
260
+ "items": item_embeddings.shape if item_embeddings is not None else None,
261
+ "users": user_embeddings.shape if user_embeddings is not None else None
262
+ },
263
+ "index": {
264
+ "type": type(faiss_index).__name__ if faiss_index else None,
265
+ "total_vectors": faiss_index.ntotal if faiss_index else 0
266
+ },
267
+ "metadata": metadata,
268
+ "movies_loaded": len(movies_data)
269
+ }
270
+
271
+
272
+ @app.get("/movies", tags=["Movies"])
273
+ async def get_all_movies():
274
+ """Get all movie metadata. Use for frontend caching."""
275
+ return {
276
+ "count": len(movies_data),
277
+ "movies": movies_data
278
+ }
279
+
280
+
281
+ @app.get("/movie/{item_id}", tags=["Movies"])
282
+ async def get_movie(item_id: int):
283
+ """Get metadata for a specific movie by encoded ID."""
284
+ item_key = str(item_id)
285
+ if item_key not in movies_data:
286
+ return {
287
+ "item_id": item_id,
288
+ "title": f"Movie #{item_id}",
289
+ "year": None,
290
+ "genres": [],
291
+ "tmdbId": None,
292
+ "found": False
293
+ }
294
+
295
+ movie = movies_data[item_key]
296
+ return {
297
+ "item_id": item_id,
298
+ **movie,
299
+ "found": True
300
+ }
301
+
302
+
303
+ @app.get("/movies/search", tags=["Movies"])
304
+ async def search_movies(
305
+ q: str = Query(..., min_length=2, description="Search query"),
306
+ limit: int = Query(default=20, ge=1, le=50)
307
+ ):
308
+ """Search movies by title, tags, and genres with weighted scoring."""
309
+ q_lower = q.lower()
310
+ scored_results = []
311
+
312
+ # Log search event to Kafka (Production Event)
313
+ from kafka_utils import kafka_producer
314
+ kafka_producer.send_event("user_searches", "SEARCH_QUERY", {"query": q})
315
+
316
+ for item_id, movie in movies_data.items():
317
+ score = 0
318
+ title = movie.get("title", "").lower()
319
+ tags = [t.lower() for t in movie.get("tags", [])]
320
+ genres = [g.lower() for g in movie.get("genres", [])]
321
+
322
+ # 1. Title Match (Highest Priority)
323
+ if q_lower == title:
324
+ score += 100
325
+ elif q_lower in title:
326
+ score += 50
327
+
328
+ # 2. Tag Match (Medium Priority)
329
+ # Check if query is in tags OR tag is in query (e.g. "hindi movies" -> tag "hindi")
330
+ query_tokens = set(q_lower.split())
331
+ for tag in tags:
332
+ if q_lower == tag:
333
+ score += 30
334
+ elif q_lower in tag:
335
+ score += 20
336
+ elif tag in q_lower and len(tag) > 2: # Avoid matching short words like "in"
337
+ score += 25
338
+
339
+ # Check for token overlap
340
+ if tag in query_tokens:
341
+ score += 15
342
+
343
+ # 3. Genre Match (Lowest Priority)
344
+ for genre in genres:
345
+ if q_lower == genre:
346
+ score += 20
347
+ elif genre in q_lower:
348
+ score += 20
349
+
350
+ if score > 0:
351
+ scored_results.append({
352
+ "item_id": int(item_id),
353
+ "score": score,
354
+ **movie
355
+ })
356
+
357
+ # Sort by score descending
358
+ scored_results.sort(key=lambda x: x["score"], reverse=True)
359
+
360
+ return {
361
+ "query": q,
362
+ "count": len(scored_results),
363
+ "results": scored_results[:limit]
364
+ }
365
+
366
+
367
+ # ============================================
368
+ # Redis Configuration
369
+ # ============================================
370
+ import redis
371
+
372
+ REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0")
373
+ redis_client = None
374
+
375
+ try:
376
+ redis_client = redis.from_url(REDIS_URL, decode_responses=True)
377
+ except Exception as e:
378
+ logger.warning(f"Redis connection failed: {e}. Caching will be disabled.")
379
+
380
+
381
+ # ============================================
382
+ # Startup Event
383
+ # ============================================
384
+
385
+ @app.on_event("startup")
386
+ async def startup():
387
+ logger.info("🎬 MovieRec AI starting...")
388
+ success = load_embeddings()
389
+ if success:
390
+ logger.info("✅ API ready with trained embeddings!")
391
+ else:
392
+ logger.warning("⚠️ Running without embeddings - upload trained files!")
393
+
394
+ # Check Redis
395
+ if redis_client:
396
+ try:
397
+ redis_client.ping()
398
+ logger.info("✅ Redis connected successfully!")
399
+ except Exception as e:
400
+ logger.warning(f"⚠️ Redis ping failed: {e}")
401
+
402
+ # ... (Rest of the file remains similar, but we update endpoints)
403
+
404
+ # Cache for top 50 movies to avoid re-sorting every request
405
+ # Replaced global list with Redis pattern
406
+ @app.get("/movies/top50", tags=["Movies"])
407
+ async def get_top_50_movies():
408
+ """Get top 50 movies by average rating (min 1000 votes). Cached in Redis for 1 hour."""
409
+
410
+ # Try Redis first
411
+ if redis_client:
412
+ try:
413
+ cached = redis_client.get("top_50_movies")
414
+ if cached:
415
+ results = json.loads(cached)
416
+ return {"count": len(results), "results": results, "source": "redis"}
417
+ except Exception as e:
418
+ logger.error(f"Redis get error: {e}")
419
+
420
+ # Filter and sort movies (Fallthrough logic)
421
+ valid_movies = []
422
+ for item_id, movie in movies_data.items():
423
+ if movie.get("vote_count", 0) >= 1000:
424
+ valid_movies.append({
425
+ "item_id": int(item_id),
426
+ **movie
427
+ })
428
+
429
+ # Sort by rating descending
430
+ valid_movies.sort(key=lambda x: x.get("vote_average", 0), reverse=True)
431
+
432
+ # Top 50
433
+ top_50 = valid_movies[:50]
434
+
435
+ # Save to Redis
436
+ if redis_client:
437
+ try:
438
+ redis_client.setex("top_50_movies", 3600, json.dumps(top_50)) # Cache for 1 hour
439
+ except Exception as e:
440
+ logger.error(f"Redis set error: {e}")
441
+
442
+ return {
443
+ "count": len(top_50),
444
+ "results": top_50,
445
+ "source": "database"
446
+ }
447
+
448
+
449
+ @app.get("/movies/tv", tags=["Movies"])
450
+ async def get_tv_movies():
451
+ """Get popular TV shows from the dataset. Cached in Redis for 1 hour."""
452
+
453
+ # Try Redis first
454
+ if redis_client:
455
+ try:
456
+ cached = redis_client.get("tv_movies")
457
+ if cached:
458
+ results = json.loads(cached)
459
+ return {"count": len(results), "results": results, "source": "redis"}
460
+ except Exception as e:
461
+ logger.error(f"Redis get error: {e}")
462
+
463
+ # Filter for TV shows
464
+ tv_shows = []
465
+ for item_id, movie in movies_data.items():
466
+ if movie.get("media_type") == "tv":
467
+ tv_shows.append({
468
+ "item_id": int(item_id),
469
+ **movie
470
+ })
471
+
472
+ # Sort by rating descending
473
+ tv_shows.sort(key=lambda x: x.get("vote_average", 0), reverse=True)
474
+
475
+ # Save to Redis
476
+ if redis_client:
477
+ try:
478
+ redis_client.setex("tv_movies", 3600, json.dumps(tv_shows))
479
+ except Exception as e:
480
+ logger.error(f"Redis set error: {e}")
481
+
482
+ return {
483
+ "count": len(tv_shows),
484
+ "results": tv_shows,
485
+ "source": "database"
486
+ }
487
+
488
+
489
+ # ============================================
490
+ # Chat & LLM Integration
491
+ # ============================================
492
+ from llm_engine import LLMEngine
493
+ llm_engine = LLMEngine()
494
+
495
+ class ChatRequest(BaseModel):
496
+ message: str
497
+ session_id: str
498
+
499
+ @app.post("/chat/message", tags=["Chat"])
500
+ async def chat_message(request: ChatRequest):
501
+ """
502
+ Handle chat interactions:
503
+ 1. Retrieve history from Redis
504
+ 2. Parse intent (filters) with LLM
505
+ 3. Search movies with filters
506
+ 4. Generate response with LLM
507
+ 5. Update history
508
+ """
509
+
510
+ # 1. Get History
511
+ history = []
512
+ if redis_client:
513
+ try:
514
+ cached_history = redis_client.get(f"chat:{request.session_id}")
515
+ if cached_history:
516
+ history = json.loads(cached_history)
517
+ except Exception as e:
518
+ logger.error(f"Redis get chat history error: {e}")
519
+
520
+ # Add user message to history
521
+ history.append({"role": "user", "content": request.message})
522
+
523
+ # 2. Parse Intent
524
+ filters = llm_engine.parse_intent(request.message)
525
+ logger.info(f"Extracted filters: {filters}")
526
+
527
+ # 3. Search Movies logic (simplified version of search_movies)
528
+ # We will score movies based on filters + simple text match if relevant
529
+ # For now, just simplistic filter application on top of popularity or relevance
530
+
531
+ candidates = []
532
+
533
+ # Simple candidate generation strategy:
534
+ # If filters exist, filter all movies. If not, maybe just use top 20 popular?
535
+ # Or actually run the search logic if query is present?
536
+ # Let's simple reuse search logic manually or call internal function?
537
+ # We'll do a custom filter pass:
538
+
539
+ for item_id, movie in movies_data.items():
540
+ score = 0
541
+
542
+ # Filter checks
543
+ if "year_min" in filters and movie.get("year", 0) < filters["year_min"]: continue
544
+ if "year_max" in filters and movie.get("year", 0) > filters["year_max"]: continue
545
+
546
+ # Genre filter (any match)
547
+ if "genres" in filters:
548
+ movie_genres = [g.lower() for g in movie.get("genres", [])]
549
+ target_genres = [g.lower() for g in filters["genres"]]
550
+ if not any(g in movie_genres for g in target_genres):
551
+ continue
552
+
553
+ # Duration check
554
+ # (Assuming we had runtime in metadata, if not, skip)
555
+
556
+ # Scoring: Text match or Popularity
557
+ # If the user query has "action", we filtered.
558
+ # So now we just want good movies.
559
+ score = movie.get("vote_average", 0) + (movie.get("vote_count", 0) / 10000)
560
+
561
+ candidates.append({
562
+ "item_id": int(item_id),
563
+ "score": score,
564
+ **movie
565
+ })
566
+
567
+ # Sort and take top 5
568
+ candidates.sort(key=lambda x: x["score"], reverse=True)
569
+ top_candidates = candidates[:5]
570
+
571
+ # 4. Generate Response
572
+ response_text = llm_engine.generate_response(request.message, top_candidates, history)
573
+
574
+ # Add assistant response to history
575
+ history.append({"role": "assistant", "content": response_text})
576
+
577
+ # 5. Save History (limit to last 10 turns)
578
+ if redis_client:
579
+ try:
580
+ redis_client.setex(f"chat:{request.session_id}", 3600, json.dumps(history[-10:]))
581
+ except Exception as e:
582
+ logger.error(f"Redis set chat history error: {e}")
583
+
584
+ return {
585
+ "response": response_text,
586
+ "recommendations": top_candidates
587
+ }
588
+
589
+ @app.delete("/chat/history/{session_id}", tags=["Chat"])
590
+ async def clear_history(session_id: str):
591
+ if redis_client:
592
+ redis_client.delete(f"chat:{session_id}")
593
+ return {"status": "cleared"}
llm_engine.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import logging
4
+ from typing import List, Dict, Any, Optional
5
+ from huggingface_hub import InferenceClient
6
+
7
+ # Configure logging
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class LLMEngine:
11
+ """
12
+ Handles interactions with Hugging Face Inference API for:
13
+ 1. Intent Parsing (Filter extraction)
14
+ 2. Response Generation (RAG)
15
+ """
16
+
17
+ def __init__(self):
18
+ self.token = os.environ.get("HF_TOKEN")
19
+ if not self.token:
20
+ logger.warning("⚠️ HF_TOKEN not found. Chat features will be disabled.")
21
+ self.client = None
22
+ else:
23
+ # Using Mistral-7B-Instruct-v0.2 for good instruction following
24
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.2"
25
+ self.client = InferenceClient(model=self.model_id, token=self.token)
26
+ logger.info(f"✅ LLM Engine initialized with {self.model_id}")
27
+
28
+ def parse_intent(self, user_query: str) -> Dict[str, Any]:
29
+ """
30
+ Extracts search filters from natural language query.
31
+ Returns a dictionary of filters (year_min, year_max, genres, etc.)
32
+ """
33
+ if not self.client:
34
+ return {}
35
+
36
+ system_prompt = """
37
+ You are a movie recommendation assistant. Your goal is to extract structured search filters from the user's query.
38
+ Return ONLY a JSON object with the following keys if applicable:
39
+ - "genres": list of strings (e.g. ["Action", "Comedy"])
40
+ - "year_min": int (e.g. 1990)
41
+ - "year_max": int (e.g. 1999)
42
+ - "duration_max": int (in minutes, e.g. 120)
43
+
44
+ Example: "Recommend a 90s action movie under 2 hours"
45
+ Output: {"genres": ["Action"], "year_min": 1990, "year_max": 1999, "duration_max": 120}
46
+
47
+ If no filters are found, return {}.
48
+ """
49
+
50
+ prompt = f"[INST] {system_prompt}\n\nQuery: {user_query} [/INST]"
51
+
52
+ try:
53
+ response = self.client.text_generation(
54
+ prompt,
55
+ max_new_tokens=150,
56
+ temperature=0.1, # Low temp for deterministic JSON
57
+ return_full_text=False
58
+ )
59
+
60
+ # Simple cleanup to find JSON
61
+ import re
62
+ json_match = re.search(r"\{.*\}", response.replace("\n", ""), re.DOTALL)
63
+ if json_match:
64
+ filters = json.loads(json_match.group())
65
+ logger.info(f"Parsed filters: {filters}")
66
+ return filters
67
+ return {}
68
+
69
+ except Exception as e:
70
+ logger.error(f"Error parsing intent: {e}")
71
+ return {}
72
+
73
+ def generate_response(self, user_query: str, candidates: List[Dict], history: List[Dict]) -> str:
74
+ """
75
+ Generates a natural language response based on the search results.
76
+ """
77
+ if not self.client:
78
+ return "I'm sorry, I can't chat right now because my AI brain is missing (HF_TOKEN not set)."
79
+
80
+ # Format candidates into a context string
81
+ context_str = ""
82
+ for i, movie in enumerate(candidates[:5]): # Top 5 context
83
+ title = movie.get("title", "Unknown")
84
+ year = movie.get("year", "N/A")
85
+ genes = ", ".join(movie.get("genres", []))
86
+ overview = movie.get("overview", "")[:100] + "..." if movie.get("overview") else "No description."
87
+ context_str += f"{i+1}. {title} ({year}) - {genes}: {overview}\n"
88
+
89
+ system_prompt = """
90
+ You are a friendly and knowledgeable movie assistant.
91
+ Answer the user's request using the provided movie context.
92
+ - Be conversational.
93
+ - Briefly mention why you picked these movies based on the user's criteria.
94
+ - Do NOT make up movies. Use ONLY the provided context.
95
+ """
96
+
97
+ # Format history (last 2 turns)
98
+ history_str = ""
99
+ for msg in history[-2:]:
100
+ role = "User" if msg["role"] == "user" else "Assistant"
101
+ history_str += f"{role}: {msg['content']}\n"
102
+
103
+ prompt = f"""[INST] {system_prompt}
104
+
105
+ Context:
106
+ {context_str}
107
+
108
+ History:
109
+ {history_str}
110
+
111
+ User: {user_query}
112
+ [/INST]"""
113
+
114
+ try:
115
+ response = self.client.text_generation(
116
+ prompt,
117
+ max_new_tokens=400,
118
+ temperature=0.7,
119
+ return_full_text=False
120
+ )
121
+ return response.strip()
122
+ except Exception as e:
123
+ logger.error(f"Error generating response: {e}")
124
+ return "I found some movies but had trouble explaining them. Please check the recommendations list!"
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.109.2
2
+ uvicorn==0.27.1
3
+ numpy==1.26.4
4
+ pydantic==2.6.1
5
+ scikit-learn==1.4.1.post1
6
+ faiss-cpu==1.8.0
7
+ requests==2.31.0
8
+ python-multipart==0.0.9
9
+ redis==5.0.1
10
+ kafka-python==2.0.2
11
+ mlflow==2.10.2
12
+ huggingface_hub>=0.19.0