File size: 2,748 Bytes
f5ce21a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import requests
import pandas as pd
from dotenv import load_dotenv
from urllib.parse import quote_plus

load_dotenv()
API_KEY = os.getenv("RAPIDAPI_KEY")
API_HOST = os.getenv("RAPIDAPI_HOST")

HEADERS = {
    "x-rapidapi-host": API_HOST,
    "x-rapidapi-key": API_KEY
}

BASE_URL = f"https://{API_HOST}/v1/app-store-api/reviews"

def fetch_appstore_reviews(app_id, page=1, country="us", lang="en"):
    params = {
        "id": app_id,
        "sort": "mostRecent",
        "page": page,
        "country": country,
        "lang": lang
    }
    try:
        resp = requests.get(BASE_URL, headers=HEADERS, params=params, timeout=10)
        resp.raise_for_status()
        return resp.json()
    except Exception as e:
        print(f"Exception fetching {app_id}:", e)
        return None

def search_appstore_app(query, country="us"):
    """Search Apple App Store for an app, returning trackId."""
    try:
        # ensure query is URL-safe
        safe_query = quote_plus(str(query))

        url = f"https://{API_HOST}/v1/app-store-api/search"
        params = {"q": safe_query, "country": country, "limit": 1}


        resp = requests.get(url, headers=HEADERS, params=params, timeout=10)
        resp.raise_for_status()
        results = resp.json()

        # Some APIs return {"results": [...]}, others just a list
        if isinstance(results, dict) and "results" in results:
            results = results["results"]

        return results[0]["trackId"] if results else None

    except requests.exceptions.RequestException as e:
        print(f"Search failed for {query}:", e)
        return None
    except Exception as e:
        print(f"Unexpected error searching {query}:", e)
        return None

def merge_google_apple(google_df):
    apple_data = []
    for app in google_df['App']:
        app_id = search_appstore_app(app)
        if app_id:
            reviews = fetch_appstore_reviews(app_id)
            if reviews:
                ratings = [r.get("score", 0) for r in reviews]
                avg_rating = sum(ratings)/len(ratings) if ratings else None
                apple_data.append({
                    "App": app,
                    "AppleRating": avg_rating,
                    "AppleReviews": len(reviews)
                })
                continue
        # fallback when no match
        apple_data.append({"App": app, "AppleRating": None, "AppleReviews": 0})

    apple_df = pd.DataFrame(apple_data)
    combined = pd.merge(google_df, apple_df, on="App", how="outer")
    os.makedirs("data/cleaned_data", exist_ok=True)
    combined.to_csv("data/cleaned_data/combined_apps.csv", index=False)
    return combined