Spaces:

yigitcanozdemir
/

CineSearch-Demo-Backend

Running

App Files Files Community

yigitcanozdemir commited on Jul 19

Commit

7453e74

1 Parent(s): 6c446ba

added country wise filtering and dynamic query prompt_title generation

Browse files

Files changed (6) hide show

components/filters.py +75 -61
components/gradio_ui.py +19 -19
components/similarity.py +2 -2
config.py +182 -9
models/pydantic_schemas.py +12 -6
models/recommendation_engine.py +68 -53

components/filters.py CHANGED Viewed

@@ -4,13 +4,13 @@ from typing import List, Optional
 import re
 from config import QUALITY_LEVELS
 class MovieFilter:
     def __init__(self):
         pass
     def apply_filters(self, data: pd.DataFrame, features: Features) -> pd.DataFrame:
         filtered_data = data.copy()
         if features.movie_or_series != "both":
             filtered_data = self._filter_by_type(
@@ -20,13 +20,11 @@ class MovieFilter:
         if features.genres or features.negative_genres:
             filtered_data["genreScore"] = filtered_data["genres"].apply(
                 lambda g: self.calculate_genre_score(
-                    g,
-                    features.genres or [],
-                    features.negative_genres or []
                 )
             )
         else:
             filtered_data["genreScore"] = 0.0
         if features.date_range:
@@ -35,7 +33,9 @@ class MovieFilter:
             )
         if features.quality_level:
-            filtered_data = self._filter_by_quality(filtered_data, features.quality_level)
         if (
             features.min_runtime_minutes is not None
@@ -46,7 +46,10 @@ class MovieFilter:
                 features.min_runtime_minutes,
                 features.max_runtime_minutes,
             )
         return filtered_data
     def _filter_by_runtime(
@@ -78,66 +81,77 @@ class MovieFilter:
             all_types = ["movie", "tvSeries", "tvMiniSeries", "tvMovie", "video"]
             return data[data["titleType"].isin(all_types)]
-    def calculate_genre_score(self, row_genres: str,
-                                target_genres: List[str],
-                                negative_genres: List[str]) -> float:
-            if not row_genres or pd.isna(row_genres):
-                return 0.0
-            try:
-                row_genre_list = [g.strip().lower() for g in row_genres.split(",")]
-                target_genre_list = [g.lower() for g in target_genres]
-                negative_genre_list = [g.lower() for g in negative_genres]
-                positive_matches = sum(1 for g in row_genre_list if g in target_genre_list)
-                negative_matches = sum(1 for g in row_genre_list if g in negative_genre_list)
-                score = 0.0
-                if target_genres:
-                    score = positive_matches / len(target_genre_list)
-                elif positive_matches > 0:
-                    score = 1.0
-                score -= (negative_matches * 0.5)
-                return max(0.0, score)
-            except (AttributeError, TypeError):
-                return 0.0
-    def _filter_by_genres(self, data: pd.DataFrame, genres: List[str]) -> pd.DataFrame:
-        if not genres:
-            return data
-        def count_genre_matches(row_genres, target_genres):
-            if pd.isna(row_genres):
-                return 0
             row_genre_list = [g.strip().lower() for g in row_genres.split(",")]
             target_genre_list = [g.lower() for g in target_genres]
-            matches = sum(
-                1
-                for target_genre in target_genre_list
-                if any(target_genre in row_genre for row_genre in row_genre_list)
             )
-            return matches
-        data_with_matches = data.copy()
-        data_with_matches["genre_matches"] = data_with_matches["genres"].apply(
-            lambda g: count_genre_matches(g, genres)
-        )
-        filtered_2plus = data_with_matches[data_with_matches["genre_matches"] >= 2]
-        if len(filtered_2plus) >= 20:
-            print(f"Using 2+ genre matches: {len(filtered_2plus)} results")
-            return filtered_2plus.drop("genre_matches", axis=1)
-        filtered_1plus = data_with_matches[data_with_matches["genre_matches"] >= 1]
-        print(f"Using 1+ genre matches: {len(filtered_1plus)} results")
-        return filtered_1plus.drop("genre_matches", axis=1)
     def _filter_by_date_range(
         self, data: pd.DataFrame, date_range: List[int]
@@ -158,13 +172,13 @@ class MovieFilter:
         if config:
             condition = pd.Series(True, index=data.index)
             if "min_rating" in config:
-                condition &= (data["averageRating"] >= config["min_rating"])
             if "max_rating" in config:
-                condition &= (data["averageRating"] <= config["max_rating"])
             if "min_votes" in config:
-                condition &= (data["numVotes"] >= config["min_votes"])
             if "max_votes" in config:
-                condition &= (data["numVotes"] <= config["max_votes"])
             return data[condition]
         return data

 import re
 from config import QUALITY_LEVELS
 class MovieFilter:
     def __init__(self):
         pass
     def apply_filters(self, data: pd.DataFrame, features: Features) -> pd.DataFrame:
         filtered_data = data.copy()
         if features.movie_or_series != "both":
             filtered_data = self._filter_by_type(
         if features.genres or features.negative_genres:
             filtered_data["genreScore"] = filtered_data["genres"].apply(
                 lambda g: self.calculate_genre_score(
+                    g, features.genres or [], features.negative_genres or []
                 )
             )
         else:
             filtered_data["genreScore"] = 0.0
         if features.date_range:
             )
         if features.quality_level:
+            filtered_data = self._filter_by_quality(
+                filtered_data, features.quality_level
+            )
         if (
             features.min_runtime_minutes is not None
                 features.min_runtime_minutes,
                 features.max_runtime_minutes,
             )
+        if features.country_of_origin or features.dont_wanted_countrys:
+            filtered_data = self._filter_by_country_of_origin(
+                filtered_data, features.country_of_origin, features.dont_wanted_countrys
+            )
         return filtered_data
     def _filter_by_runtime(
             all_types = ["movie", "tvSeries", "tvMiniSeries", "tvMovie", "video"]
             return data[data["titleType"].isin(all_types)]
+    def calculate_genre_score(
+        self, row_genres: str, target_genres: List[str], negative_genres: List[str]
+    ) -> float:
+        if not row_genres or pd.isna(row_genres):
+            return 0.0
+        try:
             row_genre_list = [g.strip().lower() for g in row_genres.split(",")]
             target_genre_list = [g.lower() for g in target_genres]
+            negative_genre_list = [g.lower() for g in negative_genres]
+            positive_matches = sum(1 for g in row_genre_list if g in target_genre_list)
+            negative_matches = sum(
+                1 for g in row_genre_list if g in negative_genre_list
             )
+            score = 0.0
+            if target_genres:
+                score = positive_matches / len(target_genre_list)
+            elif positive_matches > 0:
+                score = 1.0
+            score -= negative_matches * 0.5
+            return score
+        except (AttributeError, TypeError):
+            return 0.0
+    def _filter_by_country_of_origin(
+        self,
+        data: pd.DataFrame,
+        country_of_origin: List[str],
+        dont_wanted_countrys: List[str] = None,
+    ) -> pd.DataFrame:
+        if not country_of_origin and not dont_wanted_countrys:
+            return data
+        data_with_country = data.dropna(subset=["country_of_origin"])
+        def country_matches(row_countries: str) -> bool:
+            if not row_countries or pd.isna(row_countries):
+                return False
+            try:
+                row_country_list = [
+                    country.strip() for country in row_countries.split(",")
+                ]
+                if dont_wanted_countrys:
+                    has_unwanted = any(
+                        unwanted_country == row_country
+                        for unwanted_country in dont_wanted_countrys
+                        for row_country in row_country_list
+                    )
+                    if has_unwanted:
+                        return False
+                if country_of_origin:
+                    return any(
+                        target_country == row_country
+                        for target_country in country_of_origin
+                        for row_country in row_country_list
+                    )
+                return True
+            except (AttributeError, TypeError):
+                return False
+        mask = data_with_country["country_of_origin"].apply(country_matches)
+        return data_with_country[mask]
     def _filter_by_date_range(
         self, data: pd.DataFrame, date_range: List[int]
         if config:
             condition = pd.Series(True, index=data.index)
             if "min_rating" in config:
+                condition &= data["averageRating"] >= config["min_rating"]
             if "max_rating" in config:
+                condition &= data["averageRating"] <= config["max_rating"]
             if "min_votes" in config:
+                condition &= data["numVotes"] >= config["min_votes"]
             if "max_votes" in config:
+                condition &= data["numVotes"] <= config["max_votes"]
             return data[condition]
         return data

components/gradio_ui.py CHANGED Viewed

@@ -15,25 +15,26 @@ def get_recommendations_api(message, engine):
         df = result[1] if isinstance(result, tuple) and len(result) > 1 else None
         if df is None or df.empty:
             return []
-        imdb_ids = df["ImdbId"].tolist()
         recommendations = []
         for idx, (_, row) in enumerate(df.iterrows()):
             recommendations.append(
                 {
-                    "imdb_id": row["ImdbId"],
-                    "title": row["Title"],
-                    "year": row["Year"],
-                    "type": row["Type"],
-                    "rating": row["Rating"],
-                    "runtime_minutes": row["RuntimeMinutes"],
-                    "votes": row["Votes"],
-                    "genres": row["Genres"],
-                    "similarity": row["Similarity"],
-                    "hybrid_score": row["Hybrid Score"],
-                    "overview": row["Overview"],
-                    "poster_url": row["Poster Url"],
-                    "final_score": row["Final Score"],
-                    "genre_score": row["Genre Score"],
                 }
             )
@@ -51,7 +52,7 @@ def get_recommendations_api(message, engine):
         titles = result_df["title"].tolist()
         print(titles)
         print(result_df)
-        return recommendations
     except Exception as e:
         print(f"Error getting recommendations: {e}")
         return []
@@ -63,10 +64,9 @@ def create_interface(engine):
     iface = gr.Interface(
         fn=predict_wrapper,
-        inputs=gr.Textbox(lines=1, placeholder="Type your movie query..."),
         outputs=gr.JSON(label="Recommendations"),
-        title="Movie Recommendation API",
-        description="Type a movie or genre, get recommendations with posters.",
         api_name="predict",
     )
     return iface

         df = result[1] if isinstance(result, tuple) and len(result) > 1 else None
         if df is None or df.empty:
             return []
+        prompt_title = result[0]
         recommendations = []
         for idx, (_, row) in enumerate(df.iterrows()):
             recommendations.append(
                 {
+                    "imdb_id": row["tconst"],
+                    "title": row["title"],
+                    "year": row["year"],
+                    "type": row["type"],
+                    "rating": row["rating"],
+                    "runtime_minutes": row["runtimeMinutes"],
+                    "votes": row["votes"],
+                    "genres": row["genres"],
+                    "similarity": row["similarity_score"],
+                    "hybrid_score": row["hybrid_score"],
+                    "overview": row["overview"],
+                    "poster_url": row["poster_url"],
+                    "final_score": row["final_score"],
+                    "genre_score": row["genre_score"],
+                    "country_of_origin": row["country_of_origin"],
                 }
             )
         titles = result_df["title"].tolist()
         print(titles)
         print(result_df)
+        return {"recommendations": recommendations, "prompt_title": prompt_title}
     except Exception as e:
         print(f"Error getting recommendations: {e}")
         return []
     iface = gr.Interface(
         fn=predict_wrapper,
+        inputs=gr.Textbox(lines=1, placeholder="Type your query..."),
         outputs=gr.JSON(label="Recommendations"),
+        title="Recommendation API",
         api_name="predict",
     )
     return iface

components/similarity.py CHANGED Viewed

@@ -97,7 +97,7 @@ class SimilarityCalculator:
             filtered_data,
             similarity_weight=1,
             rating_weight=rating_weight,
-            genre_weight=0.2,
         )
         top_indices = (
@@ -105,7 +105,6 @@ class SimilarityCalculator:
             .indices.cpu()
             .numpy()
         )
         results = []
         for idx in top_indices:
             original_idx = filtered_data.iloc[idx].name
@@ -126,6 +125,7 @@ class SimilarityCalculator:
                 "final_score": row["finalScore"],
                 "genre_score": row["genreScore"],
                 "poster_url": row["poster_url"],
             }
             results.append(result)

             filtered_data,
             similarity_weight=1,
             rating_weight=rating_weight,
+            genre_weight=0.3,
         )
         top_indices = (
             .indices.cpu()
             .numpy()
         )
         results = []
         for idx in top_indices:
             original_idx = filtered_data.iloc[idx].name
                 "final_score": row["finalScore"],
                 "genre_score": row["genreScore"],
                 "poster_url": row["poster_url"],
+                "country_of_origin": row["country_of_origin"],
             }
             results.append(result)

config.py CHANGED Viewed

@@ -35,15 +35,188 @@ GENRE_LIST = Literal[
     "Adult",
     "Reality-TV",
 ]
 QUALITY_LEVELS = {
-        "legendary": {"min_rating": 8.5, "min_votes": 100000,"rating_weight":0.3},
-        "classic": {"min_rating": 7.5, "min_votes": 50000,"rating_weight":0.25},
-        "popular": {"min_rating": 6.5, "min_votes": 10000,"rating_weight":0.2},
-        "niche": {"min_rating": 7.0, "max_votes": 50000,"rating_weight":-0.1},
-        "cult": {"min_rating": 6.0, "max_votes": 25000,"rating_weight":-0.15},
-        "mainstream": {"min_rating": 5.5, "min_votes": 10000,"rating_weight":0.2},
-        "any": {"rating_weight": 0.1}
-    }
 class Config:
     OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -54,4 +227,4 @@ class Config:
     DATA_FILE = "data/demo_data.parquet"
     THEME = "soft"
-    TITLE = "🎬 AI Movie & TV Series Recommender"

     "Adult",
     "Reality-TV",
 ]
+COUNTRY_LIST = Literal[
+    "Italy",
+    "France",
+    "Sweden",
+    "Germany",
+    "United States",
+    "Denmark",
+    "Soviet Union",
+    "United Kingdom",
+    "Australia",
+    "Austria",
+    "Switzerland",
+    "Japan",
+    "Canada",
+    "Isle of Man",
+    "Hungary",
+    "Brazil",
+    "Czechoslovakia",
+    "Portugal",
+    "Mexico",
+    "Norway",
+    "India",
+    "West Germany",
+    "Yugoslavia",
+    "Spain",
+    "Egypt",
+    "Finland",
+    "Albania",
+    "Poland",
+    "Greece",
+    "Hong Kong",
+    "East Germany",
+    "Venezuela",
+    "Ireland",
+    "Jamaica",
+    "Monaco",
+    "Turkey",
+    "Bulgaria",
+    "Romania",
+    "Israel",
+    "Cuba",
+    "Algeria",
+    "Bahamas",
+    "China",
+    "Taiwan",
+    "South Africa",
+    "Senegal",
+    "Belgium",
+    "Bermuda",
+    "Morocco",
+    "Argentina",
+    "Netherlands",
+    "Croatia",
+    "Chile",
+    "Iran",
+    "Estonia",
+    "Luxembourg",
+    "Peru",
+    "Colombia",
+    "Bangladesh",
+    "Thailand",
+    "Philippines",
+    "Lebanon",
+    "Libya",
+    "Kuwait",
+    "Côte d'Ivoire",
+    "Iceland",
+    "South Korea",
+    "Fiji",
+    "Botswana",
+    "New Zealand",
+    "Greenland",
+    "Martinique",
+    "Netherlands Antilles",
+    "Tunisia",
+    "Indonesia",
+    "Zimbabwe",
+    "Kenya",
+    "Mali",
+    "Burkina Faso",
+    "Cameroon",
+    "Ghana",
+    "North Korea",
+    "Macao",
+    "Jordan",
+    "Antarctica",
+    "Vietnam",
+    "Russia",
+    "Federal Republic of Yugoslavia",
+    "Uruguay",
+    "Malaysia",
+    "Armenia",
+    "Czech Republic",
+    "Liechtenstein",
+    "Georgia",
+    "North Macedonia",
+    "Bosnia and Herzegovina",
+    "Slovakia",
+    "Kazakhstan",
+    "Slovenia",
+    "Singapore",
+    "Cambodia",
+    "Aruba",
+    "Tajikistan",
+    "Latvia",
+    "Uzbekistan",
+    "Malta",
+    "Ukraine",
+    "Pakistan",
+    "Bhutan",
+    "Belarus",
+    "Cyprus",
+    "Nepal",
+    "Haiti",
+    "Lithuania",
+    "United Arab Emirates",
+    "Occupied Palestinian Territory",
+    "Serbia",
+    "Serbia and Montenegro",
+    "Afghanistan",
+    "Mongolia",
+    "Ecuador",
+    "Puerto Rico",
+    "Rwanda",
+    "Vatican",
+    "Guatemala",
+    "Iraq",
+    "Paraguay",
+    "Bahrain",
+    "Saudi Arabia",
+    "Qatar",
+    "Cayman Islands",
+    "Sudan",
+    "Dominican Republic",
+    "Sri Lanka",
+    "Liberia",
+    "Lesotho",
+    "Bolivia",
+    "Faroe Islands",
+    "Azerbaijan",
+    "New Caledonia",
+    "Costa Rica",
+    "Nigeria",
+    "Kosovo",
+    "French Polynesia",
+    "Syria",
+    "Papua New Guinea",
+    "Gambia",
+    "Chad",
+    "Panama",
+    "Moldova",
+    "Uganda",
+    "Montenegro",
+    "Laos",
+    "Mauritius",
+    "Ethiopia",
+    "Kyrgyzstan",
+    "Namibia",
+    "Benin",
+    "Mauritania",
+    "The Democratic Republic of Congo",
+    "Vanuatu",
+    "Myanmar",
+    "Tanzania",
+    "Marshall Islands",
+    "Zambia",
+    "Guadeloupe",
+    "Malawi",
+    "Yemen",
+]
 QUALITY_LEVELS = {
+    "legendary": {"min_rating": 8.0, "min_votes": 100000, "rating_weight": 0.2},
+    "classic": {"min_rating": 7.5, "min_votes": 50000, "rating_weight": 0.15},
+    "popular": {"min_rating": 6.5, "min_votes": 10000, "rating_weight": 0.15},
+    "niche": {"min_rating": 7.0, "max_votes": 50000, "rating_weight": -0.1},
+    "cult": {"min_rating": 6.0, "max_votes": 25000, "rating_weight": -0.15},
+    "mainstream": {"min_rating": 5.5, "min_votes": 10000, "rating_weight": 0.2},
+    "any": {"rating_weight": 0.1},
+}
 class Config:
     OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
     DATA_FILE = "data/demo_data.parquet"
     THEME = "soft"
+    TITLE = "🎬 AI Movie & TV Series Recommender"

models/pydantic_schemas.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from pydantic import BaseModel, Field
 from typing import Literal, Optional
-from config import GENRE_LIST
-from config import QUALITY_LEVELS
 class Features(BaseModel):
     movie_or_series: Literal["movie", "tvSeries", "both"] = Field(
@@ -15,11 +15,10 @@ class Features(BaseModel):
     )
     quality_level: str = Field(
         default="any",
-        description="Quality expectation: legendary, classic, popular, niche, cult, mainstream, any"
     )
-    positive_themes: str = Field(
-        default="When Earth becomes uninhabitable in the future, a farmer and ex-NASA pilot, Joseph Cooper, is tasked to pilot a spacecraft, along with a team of researchers, to find a new planet for humans.",
-        description="Themes that should be present in the results"
     )
     negative_themes: Optional[str] = Field(
         description="Themes that should be avoided in the results"
@@ -33,3 +32,10 @@ class Features(BaseModel):
     max_runtime_minutes: Optional[int] = Field(
         description="Preferred maximum runtimes as minutes", default=None
     )

 from pydantic import BaseModel, Field
 from typing import Literal, Optional
+from config import GENRE_LIST, COUNTRY_LIST
 class Features(BaseModel):
     movie_or_series: Literal["movie", "tvSeries", "both"] = Field(
     )
     quality_level: str = Field(
         default="any",
+        description="Quality expectation: legendary, classic, popular, niche, cult, mainstream, any",
     )
+    positive_themes: Optional[str] = Field(
+        description="Themes that should be present in the results",
     )
     negative_themes: Optional[str] = Field(
         description="Themes that should be avoided in the results"
     max_runtime_minutes: Optional[int] = Field(
         description="Preferred maximum runtimes as minutes", default=None
     )
+    country_of_origin: list[COUNTRY_LIST] = Field(
+        description="Preferred country of production"
+    )
+    dont_wanted_countrys: list[COUNTRY_LIST] = Field(
+        description="Unwanted country of production"
+    )
+    prompt_title: str = Field(description="A short and meaningful title for the prompt")

models/recommendation_engine.py CHANGED Viewed

@@ -72,11 +72,6 @@ class RecommendationEngine:
                     raise similarity_error
             print(f"🔍 Found {len(search_results['results'])} results.")
-            print("📋 Formatting results...")
-            start_time = time.time()
-            formatted_results = self._format_results(search_results)
-            format_time = time.time() - start_time
-            print(f"✅ Results formatted in {format_time:.4f} seconds")
             print("📊 Creating results dataframe...")
             start_time = time.time()
@@ -85,7 +80,7 @@ class RecommendationEngine:
             print(f"✅ Dataframe created in {df_time:.4f} seconds")
             print("🎉 Recommendation process completed successfully!")
-            return formatted_results, results_df
         except Exception as e:
             print(f"❌ Critical error in recommendation process: {str(e)}")
@@ -120,7 +115,7 @@ class RecommendationEngine:
                                     ---
                                     ### GENRES
-                                    - If the user mentions a specific movie/show, extract its ACTUAL genres (e.g., IMDb/TMDB genres).
                                     - If unsure, infer 1–2 of the most likely/popular genres.
                                     - If user directly mentions genres, match exactly from the allowed genre list.
                                     - Prefer accuracy over guessing; leave empty if absolutely no genre can be inferred.
@@ -211,9 +206,10 @@ class RecommendationEngine:
                                     ✅ GOOD THEMES EXAMPLES:
                                     - “In 1970s New York, a Mafia don must navigate betrayal and FBI pressure to hold his criminal empire together.”
                                     - “A Mexican drug lord rises to power as DEA agents close in on his cross-border empire.”
-                                    - "In an alternative version of 1969, the Soviet Union beats the United States to the Moon, and the space race continues on for decades with still grander challenges and goals."
-                                    - "When Earth becomes uninhabitable in the future, a farmer and ex-NASA pilot, Joseph Cooper, is tasked to pilot a spacecraft, along with a team of researchers, to find a new planet for humans"
-                                    - "An astronaut becomes stranded on Mars after his team assume him dead, and must rely on his ingenuity to find a way to signal to Earth that he is alive and can survive until a potential rescue."
                                     ❌ BAD THEMES TO AVOID:
                                     - “A powerful family faces betrayal as they try to protect their empire.” ⟶ Too vague and franchise-prone
@@ -255,11 +251,57 @@ class RecommendationEngine:
                                     - Defaults to `[1900, 2025]` if not constrained.
                                     - "recent", "modern" → prefer `[2010, 2025]`
                                     - "classic", "old" → prefer `[1950, 1995]`
                                     ---
                                     ### LANGUAGE
                                     If the query is not in English, **translate to English first**, then apply the above rules.
                                     ---
@@ -290,34 +332,6 @@ class RecommendationEngine:
                 production_region=[],
             )
-    def _format_results(self, search_results: dict) -> str:
-        if not search_results["results"]:
-            return search_results["status"]
-        output = []
-        output.append(f"🎬 {search_results['status']}")
-        output.append(
-            f"🔍 Search completed in {search_results['search_time']:.4f} seconds"
-        )
-        output.append(
-            f"📊 Found {len(search_results['results'])} results from {search_results['total_candidates']} candidates"
-        )
-        output.append("=" * 50)
-        for i, result in enumerate(search_results["results"], 1):
-            output.append(f"{i}. **{result['title']}** ({result['year']})")
-            output.append(f"   📝 Type: {result['type'].title()}")
-            output.append(
-                f"   ⭐ Rating: {result['rating']}/10 ({result['votes']:,} votes)"
-            )
-            output.append(f"   🎭 Genres: {result['genres']}")
-            output.append(f"   📊 Similarity: {result['similarity_score']:.4f}")
-            output.append(f"   🏆 Hybrid Score: {result['hybrid_score']:.4f}")
-            output.append(f"   📄 {result['overview']}")
-            output.append("")
-        return "\n".join(output)
     def _create_results_dataframe(self, search_results: dict) -> pd.DataFrame:
         if not search_results["results"]:
             return pd.DataFrame()
@@ -326,20 +340,21 @@ class RecommendationEngine:
         for result in search_results["results"]:
             df_data.append(
                 {
-                    "ImdbId": result["tconst"],
-                    "Title": result["title"],
-                    "Type": result["type"],
-                    "Year": result["year"],
-                    "Rating": result["rating"],
-                    "RuntimeMinutes": result["runtimeMinutes"],
-                    "Votes": result["votes"],
-                    "Genres": result["genres"],
-                    "Similarity": f"{result['similarity_score']:.4f}",
-                    "Hybrid Score": f"{result['hybrid_score']:.4f}",
-                    "Overview": result["overview"],
-                    "Final Score": f"{result['final_score']:.4f}",
-                    "Genre Score": f"{result['genre_score']:.4f}",
-                    "Poster Url": result["poster_url"],
                 }
             )
         return pd.DataFrame(df_data)

                     raise similarity_error
             print(f"🔍 Found {len(search_results['results'])} results.")
             print("📊 Creating results dataframe...")
             start_time = time.time()
             print(f"✅ Dataframe created in {df_time:.4f} seconds")
             print("🎉 Recommendation process completed successfully!")
+            return features.prompt_title, results_df
         except Exception as e:
             print(f"❌ Critical error in recommendation process: {str(e)}")
                                     ---
                                     ### GENRES
+                                    - If the user mentions a specific movie/show, extract its ACTUAL genres (e.g., IMDb/TMDB genres). (Example if user wants anime, select animation etc.)
                                     - If unsure, infer 1–2 of the most likely/popular genres.
                                     - If user directly mentions genres, match exactly from the allowed genre list.
                                     - Prefer accuracy over guessing; leave empty if absolutely no genre can be inferred.
                                     ✅ GOOD THEMES EXAMPLES:
                                     - “In 1970s New York, a Mafia don must navigate betrayal and FBI pressure to hold his criminal empire together.”
                                     - “A Mexican drug lord rises to power as DEA agents close in on his cross-border empire.”
+                                    - “New Jersey mob boss Tony Soprano deals with personal and professional issues in his home and business life that affect his mental state, leading him to seek professional psychiatric counseling.“
+                                    - “In an alternative version of 1969, the Soviet Union beats the United States to the Moon, and the space race continues on for decades with still grander challenges and goals.“
+                                    - “When Earth becomes uninhabitable in the future, a farmer and ex-NASA pilot, Joseph Cooper, is tasked to pilot a spacecraft, along with a team of researchers, to find a new planet for humans“
+                                    - “An astronaut becomes stranded on Mars after his team assume him dead, and must rely on his ingenuity to find a way to signal to Earth that he is alive and can survive until a potential rescue.“
                                     ❌ BAD THEMES TO AVOID:
                                     - “A powerful family faces betrayal as they try to protect their empire.” ⟶ Too vague and franchise-prone
                                     - Defaults to `[1900, 2025]` if not constrained.
                                     - "recent", "modern" → prefer `[2010, 2025]`
                                     - "classic", "old" → prefer `[1950, 1995]`
+                                    ---
+                                    ### COUNTRY OF ORIGIN
+                                    Analyze the user's country of origin preference:
+                                    - "Turkish movies", "Türk filmi" → `["Turkey"]`
+                                    - "Hollywood films", "American movies" → `["United States"]`
+                                    - "Bollywood", "Indian cinema" → `["India"]`
+                                    - "French films", "French cinema" → `["France"]`
+                                    - "Korean movies", "K-drama" → `["South Korea"]`
+                                    - "Japanese anime", "Japanese films" → `["Japan"]`
+                                    - "British series", "UK shows" → `["United Kingdom"]`
+                                    - "German films", "German cinema" → `["Germany"]`
+                                    - "Italian movies", "Italian cinema" → `["Italy"]`
+                                    - "Spanish films", "Spanish series" → `["Spain"]`
+                                    - "Russian movies", "Russian cinema" → `["Russia"]`
+                                    - "Chinese films", "Chinese cinema" → `["China"]`
+                                    - "Brazilian movies", "Brazilian cinema" → `["Brazil"]`
+                                    - "Mexican series", "Mexican films" → `["Mexico"]`
+                                    - "Canadian films", "Canadian cinema" → `["Canada"]`
+                                    - "Australian movies", "Australian cinema" → `["Australia"]`
+                                    #### REGIONAL/CULTURAL CLUES:
+                                    - "Nordic noir", "Scandinavian" → `["Norway", "Sweden", "Denmark"]`
+                                    - "European cinema" → `["France", "Germany", "Italy", "Spain", "United Kingdom"]`
+                                    - "Asian cinema" → `["Japan", "South Korea", "China", "India"]`
+                                    - "Latin American" → `["Mexico", "Brazil", "Argentina", "Colombia"]`
+                                    - "Middle Eastern" → `["Turkey", "Iran", "Israel", "Lebanon"]`
+                                    #### PLATFORM/DISTRIBUTOR CLUES:
+                                    - "Netflix original" → Varies by platform, usually `["United States"]`
+                                    - "BBC series" → `["United Kingdom"]`
+                                    - "HBO series" → `["United States"]`
+                                    - "Amazon Prime" → Usually `["United States"]`
+                                    #### DEFAULT BEHAVIORS:
+                                    - No country specified: `[]` (empty list - all countries)
+                                    - Ambiguous expressions: `[]` (empty list)
+                                    - Multiple country preference: Return as list (e.g., `["United States", "United Kingdom"]`)
                                     ---
                                     ### LANGUAGE
                                     If the query is not in English, **translate to English first**, then apply the above rules.
+                                    ### PROMPT TITLE
+                                    Generate a short, clear, and meaningful title for users query.
+                                    ***Critical: Always return title
                                     ---
                 production_region=[],
             )
     def _create_results_dataframe(self, search_results: dict) -> pd.DataFrame:
         if not search_results["results"]:
             return pd.DataFrame()
         for result in search_results["results"]:
             df_data.append(
                 {
+                    "tconst": result["tconst"],
+                    "title": result["title"],
+                    "type": result["type"],
+                    "year": result["year"],
+                    "rating": result["rating"],
+                    "runtimeMinutes": result["runtimeMinutes"],
+                    "votes": result["votes"],
+                    "genres": result["genres"],
+                    "similarity_score": f"{result['similarity_score']:.4f}",
+                    "hybrid_score": f"{result['hybrid_score']:.4f}",
+                    "overview": result["overview"],
+                    "final_score": f"{result['final_score']:.4f}",
+                    "genre_score": f"{result['genre_score']:.4f}",
+                    "poster_url": result["poster_url"],
+                    "country_of_origin": result["country_of_origin"],
                 }
             )
         return pd.DataFrame(df_data)