|
|
import time |
|
|
from smart_search import SmartSearch |
|
|
import matplotlib.pyplot as plt |
|
|
from sklearn.metrics import precision_recall_fscore_support |
|
|
|
|
|
|
|
|
films_file = 'films.json' |
|
|
tv_series_file = 'tv_series.json' |
|
|
search_system = SmartSearch(films_file, tv_series_file) |
|
|
|
|
|
|
|
|
test_queries = [ |
|
|
{"query": "my spy", "expected_films": ["My spy 2020", "My spy 2024"], "expected_tv_series": []}, |
|
|
{"query": "my sp", "expected_films": ["My spy 2020", "My spy 2024"], "expected_tv_series": []}, |
|
|
{"query": "funky monk", "expected_films": ["Funky Monkey 2004"], "expected_tv_series": []}, |
|
|
{"query": "yaariyan", "expected_films": ["Yaariayan (2014)", "Yaariyan 2 (2023)"], "expected_tv_series": []}, |
|
|
{"query": "grand blu", "expected_films": [], "expected_tv_series": ["Grand Blue"]}, |
|
|
{"query": "aho girl", "expected_films": [], "expected_tv_series": ["Aho Girl"]}, |
|
|
] |
|
|
|
|
|
|
|
|
def benchmark_search(search_system, queries): |
|
|
detailed_results = [] |
|
|
total_time = 0 |
|
|
|
|
|
for test_case in queries: |
|
|
query = test_case['query'] |
|
|
expected_films = test_case['expected_films'] |
|
|
expected_tv_series = test_case['expected_tv_series'] |
|
|
|
|
|
start_time = time.time() |
|
|
result = search_system.search(query) |
|
|
end_time = time.time() |
|
|
|
|
|
elapsed_time = end_time - start_time |
|
|
total_time += elapsed_time |
|
|
|
|
|
|
|
|
films_precision, films_recall, films_f1, _ = precision_recall_fscore_support( |
|
|
expected_films, result['films'], average='weighted') |
|
|
tv_series_precision, tv_series_recall, tv_series_f1, _ = precision_recall_fscore_support( |
|
|
expected_tv_series, result['tv_series'], average='weighted') |
|
|
|
|
|
detailed_results.append({ |
|
|
"query": query, |
|
|
"result": result, |
|
|
"elapsed_time": elapsed_time, |
|
|
"films_precision": films_precision, |
|
|
"films_recall": films_recall, |
|
|
"films_f1": films_f1, |
|
|
"tv_series_precision": tv_series_precision, |
|
|
"tv_series_recall": tv_series_recall, |
|
|
"tv_series_f1": tv_series_f1 |
|
|
}) |
|
|
|
|
|
avg_time = total_time / len(queries) |
|
|
return detailed_results, avg_time |
|
|
|
|
|
|
|
|
detailed_results, avg_time = benchmark_search(search_system, test_queries) |
|
|
|
|
|
|
|
|
def display_results(detailed_results, avg_time): |
|
|
queries = [result['query'] for result in detailed_results] |
|
|
times = [result['elapsed_time'] for result in detailed_results] |
|
|
films_f1_scores = [result['films_f1'] for result in detailed_results] |
|
|
tv_series_f1_scores = [result['tv_series_f1'] for result in detailed_results] |
|
|
|
|
|
print(f"Average search time: {avg_time:.4f} seconds\n") |
|
|
|
|
|
for result in detailed_results: |
|
|
print(f"Query: '{result['query']}'") |
|
|
print(f"Time taken: {result['elapsed_time']:.4f} seconds") |
|
|
print("Films found:", result['result']['films']) |
|
|
print("TV Series found:", result['result']['tv_series']) |
|
|
print(f"Films Precision: {result['films_precision']:.2f}") |
|
|
print(f"Films Recall: {result['films_recall']:.2f}") |
|
|
print(f"Films F1 Score: {result['films_f1']:.2f}") |
|
|
print(f"TV Series Precision: {result['tv_series_precision']:.2f}") |
|
|
print(f"TV Series Recall: {result['tv_series_recall']:.2f}") |
|
|
print(f"TV Series F1 Score: {result['tv_series_f1']:.2f}") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
plt.figure(figsize=(10, 6)) |
|
|
plt.barh(queries, times, color='skyblue') |
|
|
plt.xlabel('Time (seconds)') |
|
|
plt.title('Time Taken for Each Search Query') |
|
|
plt.show() |
|
|
|
|
|
|
|
|
plt.figure(figsize=(10, 6)) |
|
|
width = 0.35 |
|
|
indices = range(len(queries)) |
|
|
plt.bar(indices, films_f1_scores, width, label='Films F1 Score', color='green') |
|
|
plt.bar([i + width for i in indices], tv_series_f1_scores, width, label='TV Series F1 Score', color='orange') |
|
|
plt.xticks([i + width / 2 for i in indices], queries) |
|
|
plt.xlabel('Queries') |
|
|
plt.ylabel('F1 Score') |
|
|
plt.title('F1 Scores for Films and TV Series') |
|
|
plt.legend(loc='best') |
|
|
plt.show() |
|
|
|
|
|
|
|
|
display_results(detailed_results, avg_time) |