junaid17 commited on
Commit
d8504c1
·
verified ·
1 Parent(s): 0354cd5

Upload 12 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ artifacts/movie_faiss.index filter=lfs diff=lfs merge=lfs -text
ReviewSentiment.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import numpy as np
4
+ import re
5
+
6
+ # =========================== Device ===========================
7
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+ print(f"Model running on: {DEVICE}")
9
+
10
+ # =========================== Tokenizer ===========================
11
+ def simple_tokenize(text):
12
+ return text.split()
13
+
14
+ # =========================== Model ===========================
15
+ class GoEmotionsLSTM(nn.Module):
16
+ def __init__(self, vocab_size, embed_dim=200, hidden_dim=256, num_classes=28, num_layers=2):
17
+ super().__init__()
18
+
19
+ self.embeddings = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
20
+
21
+ self.lstm = nn.LSTM(
22
+ input_size=embed_dim,
23
+ hidden_size=hidden_dim,
24
+ num_layers=num_layers,
25
+ batch_first=True,
26
+ dropout=0.2,
27
+ bidirectional=True
28
+ )
29
+
30
+ self.fc = nn.Linear(hidden_dim * 2, num_classes)
31
+ self.dropout = nn.Dropout(0.2)
32
+
33
+ def forward(self, x):
34
+ x = self.embeddings(x)
35
+ _, (h, _) = self.lstm(x)
36
+
37
+ h_forward = h[-2]
38
+ h_backward = h[-1]
39
+ h_cat = torch.cat((h_forward, h_backward), dim=1)
40
+
41
+ h_cat = self.dropout(h_cat)
42
+ out = self.fc(h_cat)
43
+ return out
44
+
45
+ # =========================== Load Model (ONCE) ===========================
46
+ def load_goemotion_model(path="artifacts/goemotions_bilstm_checkpoint.pth"):
47
+ checkpoint = torch.load(path, map_location=DEVICE)
48
+
49
+ vocab = checkpoint["vocab"]
50
+ max_len = checkpoint["max_len"]
51
+
52
+ model = GoEmotionsLSTM(vocab_size=len(vocab))
53
+ model.load_state_dict(checkpoint["model_state"])
54
+ model.to(DEVICE)
55
+ model.eval()
56
+
57
+ return model, vocab, max_len
58
+
59
+ # Load once at startup
60
+ MODEL, VOCAB, MAX_LEN = load_goemotion_model()
61
+
62
+ # =========================== Emotion Map ===========================
63
+ EMOTION_MAP = [
64
+ "admiration","amusement","anger","annoyance","approval","caring","confusion",
65
+ "curiosity","desire","disappointment","disapproval","disgust","embarrassment",
66
+ "excitement","fear","gratitude","grief","joy","love","nervousness","optimism",
67
+ "pride","realization","relief","remorse","sadness","surprise","neutral"
68
+ ]
69
+
70
+ # =========================== Sentiment Groups ===========================
71
+ POSITIVE_EMOTIONS = {
72
+ "admiration", "amusement", "approval", "caring", "desire", "excitement",
73
+ "gratitude", "joy", "love", "optimism", "pride", "relief"
74
+ }
75
+
76
+ NEGATIVE_EMOTIONS = {
77
+ "anger", "annoyance", "disappointment", "disapproval", "disgust",
78
+ "embarrassment", "fear", "grief", "nervousness", "remorse", "sadness"
79
+ }
80
+
81
+ NEUTRAL_EMOTIONS = {
82
+ "confusion", "curiosity", "realization", "surprise", "neutral"
83
+ }
84
+
85
+ # =========================== Preprocessing ===========================
86
+ _CLEAN_RE = re.compile(r'[^a-z0-9\s]+')
87
+
88
+ def clean_text(text: str) -> str:
89
+ text = text.lower()
90
+ text = _CLEAN_RE.sub(" ", text)
91
+ return " ".join(text.split())
92
+
93
+ # =========================== Core Prediction ===========================
94
+ @torch.inference_mode()
95
+ def predict_sentiment(text: str):
96
+ text = clean_text(text)
97
+ tokens = simple_tokenize(text)
98
+
99
+ seq = [VOCAB.get(tok, 1) for tok in tokens] # 1 = <UNK>
100
+
101
+ if len(seq) < MAX_LEN:
102
+ seq.extend([VOCAB["<PAD>"]] * (MAX_LEN - len(seq)))
103
+ else:
104
+ seq = seq[:MAX_LEN]
105
+
106
+ x = torch.tensor(seq, dtype=torch.long, device=DEVICE).unsqueeze(0)
107
+
108
+ logits = MODEL(x)
109
+ probs = torch.sigmoid(logits)[0].cpu().numpy()
110
+
111
+ # Aggregate probabilities
112
+ pos_score = 0.0
113
+ neg_score = 0.0
114
+ neu_score = 0.0
115
+
116
+ for i, p in enumerate(probs):
117
+ emotion = EMOTION_MAP[i]
118
+ if emotion in POSITIVE_EMOTIONS:
119
+ pos_score += p
120
+ elif emotion in NEGATIVE_EMOTIONS:
121
+ neg_score += p
122
+ elif emotion in NEUTRAL_EMOTIONS:
123
+ neu_score += p
124
+
125
+ scores = {
126
+ "positive": pos_score,
127
+ "negative": neg_score,
128
+ "neutral": neu_score
129
+ }
130
+
131
+ sentiment = max(scores, key=scores.get)
132
+ confidence = float(scores[sentiment] / (pos_score + neg_score + neu_score + 1e-8))
133
+
134
+ return {
135
+ "sentiment": sentiment,
136
+ "confidence": round(confidence, 4)
137
+ }
138
+
139
+ # =========================== Public Function ===========================
140
+ def find_sentiment(text: str):
141
+ return predict_sentiment(text)
142
+
143
+ # =========================== Analyze Sentiment ===========================
144
+
145
+ def analyze_reviews_sentiment(reviews: list[str]):
146
+ """
147
+ reviews: list of review strings
148
+ returns: percentage distribution
149
+ """
150
+
151
+ total = len(reviews)
152
+
153
+ if total == 0:
154
+ return {
155
+ "positive": 0.0,
156
+ "negative": 0.0,
157
+ "neutral": 0.0
158
+ }
159
+
160
+ counts = {
161
+ "positive": 0,
162
+ "negative": 0,
163
+ "neutral": 0
164
+ }
165
+
166
+ for review in reviews:
167
+ result = find_sentiment(review)
168
+ counts[result["sentiment"]] += 1
169
+
170
+ percentages = {
171
+ "positive": round((counts["positive"] / total) * 100, 2),
172
+ "negative": round((counts["negative"] / total) * 100, 2),
173
+ "neutral": round((counts["neutral"] / total) * 100, 2)
174
+ }
175
+
176
+ return percentages
177
+
178
+
179
+ """TEST_REVIEWS_50 = [
180
+ # Positive (1–18)
181
+ "Absolutely loved this movie, the story and acting were brilliant.",
182
+ "One of the best films I have seen this year, totally worth it.",
183
+ "The cinematography was stunning and the soundtrack was perfect.",
184
+ "I really enjoyed every minute of it, great experience.",
185
+ "An amazing performance by the lead actor, truly outstanding.",
186
+ "This movie exceeded my expectations in every way.",
187
+ "Beautiful storytelling and emotional depth, loved it.",
188
+ "The direction and screenplay were top-notch.",
189
+ "A शानदार movie, very entertaining and engaging.",
190
+ "I was smiling the whole time, such a feel-good film.",
191
+ "The action sequences were incredible and well choreographed.",
192
+ "A masterpiece, will definitely watch it again.",
193
+ "The chemistry between the actors was amazing.",
194
+ "Really inspiring and motivational movie.",
195
+ "This film made my day, absolutely fantastic.",
196
+ "Loved the humor and the emotional moments.",
197
+ "A very satisfying and enjoyable watch.",
198
+ "Brilliant execution and great visuals.",
199
+
200
+ # Negative (19–36)
201
+ "This movie was a complete waste of time.",
202
+ "I did not like it at all, very boring and slow.",
203
+ "The plot made no sense and the acting was bad.",
204
+ "Terrible screenplay and weak performances.",
205
+ "I was very disappointed with this film.",
206
+ "The movie felt too long and dragged a lot.",
207
+ "Poor direction and horrible editing.",
208
+ "Not worth the hype, very average experience.",
209
+ "The story was predictable and dull.",
210
+ "I regret watching this movie.",
211
+ "Bad acting and cringe dialogues.",
212
+ "This film was really annoying to watch.",
213
+ "Nothing interesting happened in the entire movie.",
214
+ "The worst movie I have seen in a long time.",
215
+ "Very weak script and poor execution.",
216
+ "It was painful to sit through this movie.",
217
+ "Extremely disappointing and underwhelming.",
218
+ "The movie failed to impress in any aspect.",
219
+
220
+ # Neutral (37–50)
221
+ "The movie was okay, nothing special.",
222
+ "It was an average film with decent acting.",
223
+ "The story was simple and straightforward.",
224
+ "Some parts were good, some parts were boring.",
225
+ "It was a one-time watch kind of movie.",
226
+ "The film was neither good nor bad.",
227
+ "Decent movie, could have been better.",
228
+ "The acting was fine and the story was okay.",
229
+ "Nothing extraordinary, just a regular film.",
230
+ "It was watchable but not memorable.",
231
+ "An average experience overall.",
232
+ "The movie did its job, nothing more.",
233
+ "It was fine for a weekend watch.",
234
+ "Neither impressive nor terrible."
235
+ ]
236
+
237
+ def test_50_reviews_sentiment():
238
+ print("=" * 80)
239
+ print("TESTING SENTIMENT DISTRIBUTION ON 50 MOVIE REVIEWS")
240
+ print("=" * 80)
241
+
242
+ # Individual predictions (optional but good for debugging)
243
+ for idx, review in enumerate(TEST_REVIEWS_50, start=1):
244
+ result = find_sentiment(review)
245
+ print(f"{idx:02d}. {review}")
246
+ print(f" → Sentiment: {result['sentiment'].upper():8} | Confidence: {result['confidence']}")
247
+ print("-" * 80)
248
+
249
+ print("\nAGGREGATED RESULT")
250
+ print("=" * 80)
251
+
252
+ distribution = analyze_reviews_sentiment(TEST_REVIEWS_50)
253
+
254
+ print(f"Positive : {distribution['positive']}%")
255
+ print(f"Negative : {distribution['negative']}%")
256
+ print(f"Neutral : {distribution['neutral']}%")
257
+ print("=" * 80)
258
+
259
+
260
+ # Run test
261
+ if __name__ == "__main__":
262
+ test_50_reviews_sentiment()
263
+ """
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import pandas as pd
4
+ from fastapi import FastAPI, HTTPException, Query
5
+ from fastapi.responses import FileResponse
6
+ from pydantic import BaseModel
7
+ from typing import Literal, List, Optional
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+
10
+ # --- Import your existing custom modules ---
11
+ from summarise_bot import summarise_movie as workflow
12
+ from prediction_helper import recommend
13
+ from utils import (get_movie_id, get_movie_details, get_movie_reviews, TTS)
14
+ from ReviewSentiment import analyze_reviews_sentiment
15
+
16
+ # =============================
17
+ # CONFIGURATION
18
+ # =============================
19
+ TMDB_API_KEY = "4ca4d3c95de0c88528c2682781127d55"
20
+ TMDB_BASE_URL = "https://api.themoviedb.org/3"
21
+ TMDB_IMAGE_BASE = "https://image.tmdb.org/t/p/w500"
22
+
23
+ app = FastAPI(title='Movie Recommendation System', version='2.1')
24
+
25
+ app.add_middleware(
26
+ CORSMiddleware,
27
+ allow_origins=["*"],
28
+ allow_credentials=True,
29
+ allow_methods=["*"],
30
+ allow_headers=["*"],
31
+ )
32
+
33
+ # =============================
34
+ # DATA LOADING
35
+ # =============================
36
+ try:
37
+ # Ensure this matches your actual CSV filename
38
+ movies_df = pd.read_csv('artifacts/cleaned_movie.csv')
39
+ ALL_MOVIE_TITLES = movies_df['title'].dropna().unique().tolist()
40
+ print(f"✅ Loaded {len(ALL_MOVIE_TITLES)} movies for local search.")
41
+ except Exception as e:
42
+ print(f"⚠️ Warning: Could not load local movie list. ({e})")
43
+ ALL_MOVIE_TITLES = []
44
+
45
+ # =============================
46
+ # MODELS
47
+ # =============================
48
+ class RecomendationInput(BaseModel):
49
+ movie_title: str
50
+ engine: Literal["embedding", "tfidf", "hybrid"] = "embedding"
51
+ top_k: int = 5
52
+
53
+ class MovieInfo(BaseModel):
54
+ title: str
55
+ overview: str
56
+ release_date: str
57
+ runtime: int | None
58
+ rating: float
59
+ vote_count: int
60
+ genres: list[str]
61
+ poster: str | None
62
+ backdrop: str | None
63
+
64
+ class MovieReviews(BaseModel):
65
+ title : str
66
+ num_reviews : int = 50
67
+
68
+ class WorkflowInput(BaseModel):
69
+ title: str
70
+ overview: str
71
+
72
+ # =============================
73
+ # HELPERS
74
+ # =============================
75
+ def fetch_tmdb(endpoint: str, params: dict = {}):
76
+ params['api_key'] = TMDB_API_KEY
77
+ url = f"{TMDB_BASE_URL}{endpoint}"
78
+ response = requests.get(url, params=params)
79
+ return response.json() if response.status_code == 200 else None
80
+
81
+ def format_tmdb_movies(results: list):
82
+ formatted = []
83
+ for m in results:
84
+ formatted.append({
85
+ "title": m.get("title"),
86
+ "poster": f"{TMDB_IMAGE_BASE}{m.get('poster_path')}" if m.get('poster_path') else None,
87
+ "rating": m.get("vote_average"),
88
+ "release_date": m.get("release_date", "N/A"),
89
+ "id": m.get("id"),
90
+ "vote_count": m.get("vote_count")
91
+ })
92
+ return formatted
93
+
94
+ # =============================
95
+ # ENDPOINTS
96
+ # =============================
97
+
98
+ @app.get('/')
99
+ def status():
100
+ return {'message': 'API is live', 'movies_loaded': len(ALL_MOVIE_TITLES)}
101
+
102
+ # --- TRENDING & POPULAR ---
103
+
104
+ @app.get('/movies/trending')
105
+ def get_trending(time_window: str = "week"):
106
+ data = fetch_tmdb(f"/trending/movie/{time_window}")
107
+ if not data: return []
108
+ return format_tmdb_movies(data.get("results", []))
109
+
110
+ @app.get('/movies/popular')
111
+ def get_popular():
112
+ data = fetch_tmdb("/movie/popular")
113
+ if not data: return []
114
+ return format_tmdb_movies(data.get("results", []))
115
+
116
+ # --- SEARCH ---
117
+
118
+ @app.get('/movies/search')
119
+ def search_movies(query: str = Query(..., min_length=2)):
120
+ # Search local DB for autocomplete so recommendations always work
121
+ q = query.lower()
122
+ matches = [t for t in ALL_MOVIE_TITLES if q in t.lower()][:10]
123
+ return {"results": matches}
124
+
125
+ # --- CORE FEATURES ---
126
+
127
+ @app.post('/recomendation')
128
+ def recomendation(input_data : RecomendationInput):
129
+ # If movie not in local dataset, return 404 so Frontend handles it gracefully
130
+ if input_data.movie_title not in ALL_MOVIE_TITLES:
131
+ raise HTTPException(status_code=404, detail="Movie not found in local dataset")
132
+
133
+ try:
134
+ results = recommend(movie_title=input_data.movie_title, engine=input_data.engine, top_k=input_data.top_k)
135
+ return {"results": results.to_dict(orient='records')}
136
+ except Exception as e:
137
+ raise HTTPException(status_code=500, detail=str(e))
138
+
139
+ @app.get('/movie-info/{title}', response_model=MovieInfo)
140
+ def movie_info(title: str):
141
+ try:
142
+ # Uses TMDB API via utils, works for ANY movie
143
+ movie_id = get_movie_id(title)
144
+ if not movie_id:
145
+ raise HTTPException(status_code=404, detail="Movie not found on TMDB")
146
+ return get_movie_details(movie_id=movie_id)
147
+ except Exception as e:
148
+ raise HTTPException(status_code=500, detail=str(e))
149
+
150
+ @app.post("/movie-reviews-sentiment")
151
+ def movie_reviews_sentiment(input_data: MovieReviews):
152
+ try:
153
+ movie_id = get_movie_id(movie_title=input_data.title)
154
+ reviews_data = get_movie_reviews(movie_id=movie_id, max_reviews=input_data.num_reviews)
155
+
156
+ if not reviews_data:
157
+ # Return specific error for frontend to handle
158
+ raise HTTPException(status_code=404, detail="No reviews found")
159
+
160
+ review_texts = [r["content"] for r in reviews_data if r.get("content")]
161
+ sentiment_distribution = analyze_reviews_sentiment(review_texts)
162
+
163
+ return {
164
+ "movie": input_data.title,
165
+ "total_reviews_analyzed": len(review_texts),
166
+ "sentiment_distribution": sentiment_distribution
167
+ }
168
+ except HTTPException as he:
169
+ raise he
170
+ except Exception as e:
171
+ raise HTTPException(status_code=500, detail=str(e))
172
+
173
+ @app.get('/TTS/{text}')
174
+ async def generate_tts(text: str):
175
+ try:
176
+ if not text.strip():
177
+ raise HTTPException(status_code=400, detail="Text is empty")
178
+
179
+ # The updated utils.TTS now returns a safe TEMP path
180
+ audio_path = await TTS(text=text)
181
+
182
+ if not os.path.exists(audio_path):
183
+ raise HTTPException(status_code=500, detail="Audio generation failed")
184
+
185
+ return FileResponse(audio_path, media_type="audio/mpeg", filename="summary_audio.mp3")
186
+ except Exception as e:
187
+ raise HTTPException(status_code=500, detail=str(e))
188
+
189
+ @app.post('/summarize-movie')
190
+ def summarize_movie(input_data: WorkflowInput):
191
+ try:
192
+ return workflow(title=input_data.title, overview=input_data.overview)
193
+ except Exception as e:
194
+ raise HTTPException(status_code=500, detail=str(e))
artifacts/cleaned_movie.csv ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/goemotions_bilstm_checkpoint.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2061697f00e13a048b56bfd5b8ce721ba5cdd91143ce5a1d4e1e6a272ff7944d
3
+ size 16386991
artifacts/movie_embeddings.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d4b5489815b1c9fc85cce13f25bdc158e74efa1636d1bb7e61bd01a328a14b4
3
+ size 21759104
artifacts/movie_faiss.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53103535da1e4cc8cfbc2d3ff7a5beaee18c95a9218300e9c1f9a6bda125a57b
3
+ size 21759021
artifacts/tfidf_matrix.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9a8389e8e7db42e72cc65e7ec30200775f44881ec53325cfdd1a108449970e4
3
+ size 5540299
artifacts/tfidf_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bac055cb4eacfa2fd32b6c72c248afbfd4062a5772069543cdf3b63d328066c
3
+ size 328902
prediction_helper.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import faiss
4
+ import pickle
5
+ from sentence_transformers import SentenceTransformer
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ import warnings
8
+ warnings.filterwarnings("ignore", module="sklearn")
9
+
10
+ df = pd.read_csv("artifacts/cleaned_movie.csv")
11
+
12
+ # --------------------- loading models -------------------------
13
+ print("LOADING THE MODELS...")
14
+
15
+ embeddings = np.load("artifacts/movie_embeddings.npy")
16
+ print("Embeddings shape:", embeddings.shape)
17
+
18
+ index = faiss.read_index("artifacts/movie_faiss.index")
19
+ print("FAISS index loaded. Total vectors:", index.ntotal)
20
+
21
+ with open("artifacts/tfidf_vectorizer.pkl", "rb") as f:
22
+ tfidf_vectorizer = pickle.load(f)
23
+ print("tfidf_vectorizer loaded.")
24
+
25
+ with open("artifacts/tfidf_matrix.pkl", "rb") as f:
26
+ tfidf_matrix = pickle.load(f)
27
+ print("tfidf_matrix loaded")
28
+
29
+ model = SentenceTransformer("all-MiniLM-L6-v2")
30
+ print("SentenceTransformer loaded.")
31
+
32
+ print("ALL MODELS LOADED SUCCESFULLY.")
33
+
34
+
35
+ #------------------------------ loading engines ---------------------------
36
+ def recommend_movies(movie_title, df, model, index, top_k=10):
37
+ try:
38
+ if movie_title not in df['title'].values:
39
+ return f"Movie '{movie_title}' not found in dataset."
40
+ idx = df[df['title'] == movie_title].index[0]
41
+ query_text = df.loc[idx, 'tags']
42
+ query_embedding = model.encode([query_text])
43
+ query_embedding = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True)
44
+ scores, indices = index.search(query_embedding, top_k + 1)
45
+ sim_scores = scores[0][1:]
46
+ sim_indices = indices[0][1:]
47
+
48
+ results = df.iloc[sim_indices].copy()
49
+ results["embedding_score"] = sim_scores
50
+
51
+ return results[['title', 'embedding_score']]
52
+ except Exception as e:
53
+ raise Exception(f"Error while recomending movies [embeddings] : {e}")
54
+
55
+ def recommend_movies_tfidf(movie_title, df, tfidf_matrix, top_k=5):
56
+ try:
57
+ if movie_title not in df['title'].values:
58
+ return f"Movie '{movie_title}' not found in dataset."
59
+
60
+ idx = df[df['title'] == movie_title].index[0]
61
+
62
+ cosine_sim = cosine_similarity(tfidf_matrix[idx], tfidf_matrix).flatten()
63
+
64
+ sim_indices = cosine_sim.argsort()[::-1][1:top_k+1]
65
+
66
+ results = df.iloc[sim_indices].copy()
67
+ results["tfidf_score"] = cosine_sim[sim_indices]
68
+
69
+ return results[['title', 'tfidf_score']]
70
+ except Exception as e:
71
+ raise Exception(f"Error while recomending movies [tfidf] : {e}")
72
+
73
+
74
+ def recommend_movies_hybrid(movie_title, df, model, index, tfidf_matrix, top_k=10, alpha=0.6):
75
+ try:
76
+ """
77
+ alpha = weight for embedding score
78
+ (1 - alpha) = weight for tf-idf score
79
+ """
80
+
81
+ if movie_title not in df['title'].values:
82
+ return f"Movie '{movie_title}' not found in dataset."
83
+
84
+ idx = df[df['title'] == movie_title].index[0]
85
+ query_text = df.loc[idx, 'tags']
86
+
87
+ # -------- Embedding Search --------
88
+ query_embedding = model.encode([query_text])
89
+ query_embedding = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True)
90
+
91
+ emb_scores, emb_indices = index.search(query_embedding, 50)
92
+ emb_scores = emb_scores[0]
93
+ emb_indices = emb_indices[0]
94
+
95
+ emb_df = pd.DataFrame({
96
+ "index": emb_indices,
97
+ "embedding_score": emb_scores
98
+ })
99
+
100
+ # -------- TF-IDF Search --------
101
+ cosine_sim = cosine_similarity(tfidf_matrix[idx], tfidf_matrix).flatten()
102
+ tfidf_indices = cosine_sim.argsort()[::-1][:50]
103
+ tfidf_scores = cosine_sim[tfidf_indices]
104
+
105
+ tfidf_df = pd.DataFrame({
106
+ "index": tfidf_indices,
107
+ "tfidf_score": tfidf_scores
108
+ })
109
+
110
+ # -------- Merge Both --------
111
+ merged = pd.merge(emb_df, tfidf_df, on="index", how="outer").fillna(0)
112
+
113
+ # -------- Normalize Scores --------
114
+ merged["embedding_score"] = merged["embedding_score"] / merged["embedding_score"].max()
115
+ merged["tfidf_score"] = merged["tfidf_score"] / merged["tfidf_score"].max()
116
+
117
+ # -------- Weighted Fusion --------
118
+ merged["hybrid_score"] = alpha * merged["embedding_score"] + (1 - alpha) * merged["tfidf_score"]
119
+
120
+ # -------- Final Ranking --------
121
+ merged = merged.sort_values(by="hybrid_score", ascending=False)
122
+
123
+ top_indices = merged["index"].head(top_k).values
124
+ results = df.iloc[top_indices].copy()
125
+ results["hybrid_score"] = merged["hybrid_score"].head(top_k).values
126
+
127
+ return results[['title', 'hybrid_score']]
128
+ except Exception as e:
129
+ raise Exception(f"Error while recomending movies [hybrid] : {e}")
130
+
131
+ def recommend(movie_title, engine="embedding", top_k=5):
132
+ if engine == "embedding":
133
+ return recommend_movies(movie_title, df, model, index, top_k)
134
+
135
+ elif engine == "tfidf":
136
+ return recommend_movies_tfidf(movie_title, df, tfidf_matrix, top_k)
137
+
138
+ elif engine == "hybrid":
139
+ return recommend_movies_hybrid(movie_title, df, model, index, tfidf_matrix, top_k)
140
+
141
+ else:
142
+ return "Invalid engine. Choose: 'embedding', 'tfidf', or 'hybrid'."
143
+
144
+
145
+ # ----------------------------- testing ---------------------------------------
146
+ """print(recommend("Toy Story", engine="embedding", top_k=5))
147
+ print(recommend("Toy Story", engine="tfidf", top_k=5))
148
+ print(recommend("Toy Story", engine="hybrid", top_k=5))"""
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ sentence-transformers
3
+ nltk
4
+ numpy
5
+ faiss-cpu
6
+ scikit-learn
7
+ fastapi
8
+ edge_tts
9
+ langchain
10
+ langgraph
11
+ langchain-community
12
+ langchain-openai
13
+ uvicorn
14
+ langchain-groq
summarise_bot.py ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =========================
2
+ # IMPORTS
3
+ # =========================
4
+ from langgraph.graph import StateGraph, START, END
5
+ from typing import TypedDict
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain_community.tools.tavily_search import TavilySearchResults
9
+ from langchain_core.tools import tool
10
+ import json
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+
15
+
16
+ # =========================
17
+ # TAVILY TOOL
18
+ # =========================
19
+ @tool
20
+ def tavily_search(query: str) -> dict:
21
+ """
22
+ Perform a real-time web search using Tavily.
23
+ """
24
+ try:
25
+ search = TavilySearchResults(max_results=2)
26
+ results = search.run(query)
27
+ return {"query": query, "results": results}
28
+ except Exception as e:
29
+ return {"error": str(e)}
30
+
31
+
32
+ # =========================
33
+ # LLM
34
+ # =========================
35
+ llm = ChatOpenAI(
36
+ model="gpt-4.1-nano",
37
+ temperature=0.4,
38
+ streaming=True
39
+ )
40
+
41
+
42
+ # =========================
43
+ # STATE
44
+ # =========================
45
+ class MovieState(TypedDict, total=False):
46
+ title: str
47
+ overview: str
48
+ web_context: str
49
+ key_plot_points: str
50
+ iconic_moments: str
51
+ themes: str
52
+ interesting_facts: str
53
+ songs: str
54
+ trailer: str
55
+ summary: str
56
+
57
+
58
+ # =========================
59
+ # NODE: FETCH WEB CONTEXT
60
+ # =========================
61
+ def fetch_web_context(state: MovieState):
62
+ title = state["title"]
63
+
64
+ query = f"""
65
+ Find reliable and up-to-date information about the movie "{title}".
66
+
67
+ Focus on:
68
+ - Official trailers (studio or verified YouTube channels)
69
+ - Soundtrack / songs (Spotify, Apple Music, IMDb soundtrack)
70
+ - Verified trivia or interesting facts
71
+ - Release details and reception (optional)
72
+
73
+ Prefer sources like:
74
+ - IMDb
75
+ - Wikipedia
76
+ - Official studio websites
77
+ - Verified YouTube channels
78
+ - Major entertainment publications
79
+
80
+ Avoid:
81
+ - Fan theories
82
+ - Reviews without factual info
83
+ - Opinion-heavy blogs
84
+ """
85
+
86
+ web = tavily_search.run(query)
87
+
88
+ return {
89
+ "web_context": str(web)
90
+ }
91
+
92
+ # =========================
93
+ # HELPER PROMPT RUNNER
94
+ # =========================
95
+ def run_llm(prompt: str) -> str:
96
+ return llm.invoke(prompt).content
97
+
98
+
99
+ # =========================
100
+ # ANALYSIS NODES
101
+ # =========================
102
+ def find_key_points(state: MovieState):
103
+ prompt = f"""
104
+ You are a professional movie analyst.
105
+
106
+ Movie title: {state['title']}
107
+
108
+ Overview:
109
+ {state['overview']}
110
+
111
+ Verified web context (may include reviews, trivia, or plot confirmations):
112
+ {state['web_context']}
113
+
114
+ Task:
115
+ Extract the MOST IMPORTANT plot points that define the story.
116
+
117
+ Guidelines:
118
+ - Focus on STORY EVENTS, not themes or opinions
119
+ - Keep it chronological
120
+ - Avoid unnecessary details or long explanations
121
+ - Do NOT invent scenes not supported by the overview or web context
122
+
123
+ Output format (strict):
124
+ - Bullet list
125
+ - 5–7 plot points max
126
+ - Each point: 1 concise sentence
127
+ """
128
+ return {"key_plot_points": run_llm(prompt)}
129
+
130
+
131
+ def find_iconic_moments(state: MovieState):
132
+ prompt = f"""
133
+ You are a film analyst identifying ICONIC moments.
134
+
135
+ Movie title: {state['title']}
136
+
137
+ Overview:
138
+ {state['overview']}
139
+
140
+ Verified web context (reviews, trivia, cultural references):
141
+ {state['web_context']}
142
+
143
+ Task:
144
+ Identify the most ICONIC moments from the movie.
145
+
146
+ Definition of iconic:
147
+ - Scenes that audiences remember most
148
+ - Moments often referenced in reviews, memes, or pop culture
149
+ - Visually, emotionally, or narratively standout scenes
150
+
151
+ Guidelines:
152
+ - Do NOT summarize the full plot
153
+ - Avoid repeating basic plot points
154
+ - Focus on memorable SCENES or MOMENTS
155
+ - Base choices on common recognition (not personal opinion)
156
+
157
+ Output format (strict):
158
+ - Numbered list
159
+ - 4–6 iconic moments
160
+ - Each item:
161
+ • Scene title (short)
162
+ • One-sentence explanation of why it’s iconic
163
+ """
164
+ return {"iconic_moments": run_llm(prompt)}
165
+
166
+ def find_themes(state: MovieState):
167
+ prompt = f"""
168
+ You are a movie analyst focusing on THEMES.
169
+
170
+ Movie title: {state['title']}
171
+
172
+ Overview:
173
+ {state['overview']}
174
+
175
+ Verified web context (critical analysis, reviews, commentary):
176
+ {state['web_context']}
177
+
178
+ Task:
179
+ Identify the CORE THEMES explored in the movie.
180
+
181
+ Guidelines:
182
+ - Themes should be CONCEPTS (not plot points or morals)
183
+ - Avoid vague words like "life" or "journey" unless specific
184
+ - Base themes on story events and critical interpretation
185
+ - Do NOT over-explain
186
+
187
+ Output format (strict):
188
+ - Bullet list
189
+ - 3–5 themes only
190
+ - Each theme format:
191
+ **Theme name** – one concise explanatory sentence
192
+ """
193
+ return {"themes": run_llm(prompt)}
194
+
195
+ def find_interesting_facts(state: MovieState):
196
+ prompt = f"""
197
+ You are a movie researcher collecting VERIFIED trivia.
198
+
199
+ Movie title: {state['title']}
200
+
201
+ Overview:
202
+ {state['overview']}
203
+
204
+ Verified web context (interviews, trivia, production notes, reviews):
205
+ {state['web_context']}
206
+
207
+ Task:
208
+ Extract interesting and lesser-known facts about the movie.
209
+
210
+ Guidelines:
211
+ - Facts must be BASED on the web context or widely known sources
212
+ - Avoid speculation or unverified claims
213
+ - Focus on production, casting, behind-the-scenes, or reception
214
+ - Do NOT repeat plot points
215
+
216
+ Output format (strict):
217
+ - Bullet list
218
+ - 4–6 facts
219
+ - Each fact:
220
+ • One concise sentence
221
+ • Clearly factual (no opinions)
222
+ """
223
+ return {"interesting_facts": run_llm(prompt)}
224
+
225
+ def find_songs(state: MovieState):
226
+ prompt = f"""
227
+ You are extracting OFFICIAL soundtrack information.
228
+
229
+ Movie title: {state['title']}
230
+
231
+ Verified web context (soundtrack listings, music platforms, official sources):
232
+ {state['web_context']}
233
+
234
+ Task:
235
+ Identify the official soundtrack songs associated with this movie.
236
+
237
+ Rules:
238
+ - Include ONLY officially released songs (not background score unless famous)
239
+ - Prefer reliable sources (Spotify, YouTube, Apple Music, IMDb soundtrack)
240
+ - Do NOT guess or invent songs
241
+ - Do NOT add explanations or extra text
242
+
243
+ Output format (STRICT — follow exactly):
244
+ - One song per line
245
+ - Each line format:
246
+ [song name, official link]
247
+
248
+ If no reliable song information is found:
249
+ - Return an empty list: []
250
+ """
251
+ return {"songs": run_llm(prompt)}
252
+
253
+
254
+ def find_trailer(state: MovieState):
255
+ prompt = f"""
256
+ You are retrieving OFFICIAL movie trailer information.
257
+
258
+ Movie title: {state['title']}
259
+
260
+ Verified web context (official YouTube channels, studio pages, IMDb, Wikipedia):
261
+ {state['web_context']}
262
+
263
+ Task:
264
+ Find official trailer links for this movie.
265
+
266
+ Rules:
267
+ - ONLY official trailers (no fan edits, reactions, reviews)
268
+ - Prefer studio or verified YouTube channels
269
+ - Do NOT invent or approximate links
270
+ - Do NOT include commentary or descriptions
271
+
272
+ Output format (STRICT — follow exactly):
273
+ - One trailer per line
274
+ - Each line format:
275
+ [trailer name, official link]
276
+
277
+ If no official trailer is found:
278
+ - Return an empty list: []
279
+ """
280
+ return {"trailer": run_llm(prompt)}
281
+
282
+
283
+ # =========================
284
+ # FINAL SUMMARY
285
+ # =========================
286
+ def generate_summary(state: MovieState):
287
+ prompt = f"""
288
+ You are generating a FINAL movie summary for a frontend application.
289
+
290
+ Movie title: {state['title']}
291
+
292
+ Use ONLY the information provided below.
293
+ Do NOT add new facts.
294
+ Do NOT use markdown.
295
+ Do NOT include extra text.
296
+
297
+ INPUT DATA
298
+ ---------
299
+
300
+ KEY PLOT POINTS:
301
+ {state['key_plot_points']}
302
+
303
+ ICONIC MOMENTS:
304
+ {state['iconic_moments']}
305
+
306
+ THEMES:
307
+ {state['themes']}
308
+
309
+ INTERESTING FACTS:
310
+ {state['interesting_facts']}
311
+
312
+ SONGS:
313
+ {state['songs']}
314
+
315
+ TRAILERS:
316
+ {state['trailer']}
317
+
318
+ ---------
319
+
320
+ TASK:
321
+ Return a VALID JSON object that follows this schema EXACTLY.
322
+
323
+ JSON SCHEMA (STRICT):
324
+ {{
325
+ "overview": "2–3 sentence high-level movie overview",
326
+ "key_moments": ["moment 1", "moment 2", "moment 3"],
327
+ "themes": ["theme 1", "theme 2"],
328
+ "notable_facts": ["fact 1", "fact 2"],
329
+ "soundtrack_highlights": ["song name 1", "song name 2"],
330
+ "official_trailer": "trailer name"
331
+ }}
332
+ """
333
+ return {"summary": run_llm(prompt)}
334
+
335
+
336
+
337
+ # =========================
338
+ # GRAPH
339
+ # =========================
340
+ graph = StateGraph(MovieState)
341
+
342
+ graph.add_node("fetch_web_context", fetch_web_context)
343
+ graph.add_node("find_key_points", find_key_points)
344
+ graph.add_node("find_iconic_moments", find_iconic_moments)
345
+ graph.add_node("find_themes", find_themes)
346
+ graph.add_node("find_interesting_facts", find_interesting_facts)
347
+ graph.add_node("find_songs", find_songs)
348
+ graph.add_node("find_trailer", find_trailer)
349
+ graph.add_node("generate_summary", generate_summary)
350
+
351
+ graph.add_edge(START, "fetch_web_context")
352
+
353
+ graph.add_edge("fetch_web_context", "find_key_points")
354
+ graph.add_edge("fetch_web_context", "find_iconic_moments")
355
+ graph.add_edge("fetch_web_context", "find_themes")
356
+ graph.add_edge("fetch_web_context", "find_interesting_facts")
357
+ graph.add_edge("fetch_web_context", "find_songs")
358
+ graph.add_edge("fetch_web_context", "find_trailer")
359
+
360
+ graph.add_edge("find_key_points", "generate_summary")
361
+ graph.add_edge("find_iconic_moments", "generate_summary")
362
+ graph.add_edge("find_themes", "generate_summary")
363
+ graph.add_edge("find_interesting_facts", "generate_summary")
364
+ graph.add_edge("find_songs", "generate_summary")
365
+ graph.add_edge("find_trailer", "generate_summary")
366
+
367
+ graph.add_edge("generate_summary", END)
368
+
369
+ workflow = graph.compile()
370
+
371
+ def summarise_movie(title: str, overview: str):
372
+ result = workflow.invoke({
373
+ "title": title,
374
+ "overview": overview
375
+ })
376
+
377
+ raw_summary = result["summary"]
378
+
379
+ try:
380
+ return json.loads(raw_summary)
381
+ except json.JSONDecodeError:
382
+ raise ValueError("LLM returned invalid JSON")
383
+
384
+
385
+ #print(summarise_movie("Jumanji", "Four teenagers are sucked into a magical video game..."))
utils.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+ import edge_tts
4
+ import tempfile
5
+ from uuid import uuid4
6
+ import requests
7
+ load_dotenv()
8
+
9
+
10
+ API_KEY = os.getenv("TMDB_API_KEY")
11
+ BASE_URL = "https://api.themoviedb.org/3"
12
+ IMAGE_BASE = "https://image.tmdb.org/t/p/w500"
13
+
14
+ #--------------------------------- get movie id ----------------------------
15
+ def get_movie_id(movie_title):
16
+ url = f"{BASE_URL}/search/movie"
17
+ params = {
18
+ "api_key": API_KEY,
19
+ "query": movie_title
20
+ }
21
+
22
+ r = requests.get(url, params=params)
23
+ data = r.json()
24
+
25
+ if "results" in data and len(data["results"]) > 0:
26
+ return data["results"][0]["id"]
27
+ return None
28
+
29
+ #--------------------------------- get movie reviews ----------------------------
30
+ def get_movie_reviews(movie_id, max_reviews=100):
31
+ url = f"{BASE_URL}/movie/{movie_id}/reviews"
32
+ params = {
33
+ "api_key": API_KEY,
34
+ "language": "en-US"
35
+ }
36
+
37
+ response = requests.get(url, params=params)
38
+
39
+ if response.status_code != 200:
40
+ print("TMDB Error:", response.status_code, response.text)
41
+ return []
42
+
43
+ data = response.json()
44
+
45
+ reviews = []
46
+
47
+ for review in data.get("results", [])[:max_reviews]:
48
+ reviews.append({
49
+ "author": review.get("author"),
50
+ "content": review.get("content"),
51
+ "rating": review.get("author_details", {}).get("rating"),
52
+ "created_at": review.get("created_at")
53
+ })
54
+
55
+ return reviews
56
+
57
+ #--------------------------------- get full movie details ----------------------------
58
+ def get_movie_details(movie_id):
59
+ url = f"{BASE_URL}/movie/{movie_id}"
60
+ params = {"api_key": API_KEY}
61
+
62
+ r = requests.get(url, params=params)
63
+ data = r.json()
64
+
65
+ details = {
66
+ "title": data.get("title"),
67
+ "overview": data.get("overview"),
68
+ "release_date": data.get("release_date"),
69
+ "runtime": data.get("runtime"),
70
+ "rating": data.get("vote_average"),
71
+ "vote_count": data.get("vote_count"),
72
+ "genres": [g["name"] for g in data.get("genres", [])],
73
+ "poster": IMAGE_BASE + data["poster_path"] if data.get("poster_path") else None,
74
+ "backdrop": IMAGE_BASE + data["backdrop_path"] if data.get("backdrop_path") else None
75
+ }
76
+
77
+ return details
78
+
79
+ #----------------------------------------- TTS ----------------------------------------------
80
+ async def TTS(text: str) -> str:
81
+ """
82
+ Saves audio to a SYSTEM TEMP folder so VS Code doesn't refresh.
83
+ """
84
+ if not text:
85
+ return ""
86
+
87
+ # 1. Create a temp file path (e.g., C:\Users\AppData\Local\Temp\tmp123.mp3)
88
+ # delete=False ensures the file stays so we can send it to frontend
89
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
90
+ temp_path = temp_file.name
91
+ temp_file.close() # Close handle so TTS engine can write to it
92
+
93
+ # 2. Generate Audio
94
+ # Assuming you are using edge-tts; if using gTTS, adjust accordingly
95
+ communicate = edge_tts.Communicate(text, "en-US-AriaNeural")
96
+ await communicate.save(temp_path)
97
+
98
+ return temp_path