| | import os
|
| | import requests
|
| | import pandas as pd
|
| | from dotenv import load_dotenv
|
| | from urllib.parse import quote_plus
|
| |
|
| | load_dotenv()
|
| | API_KEY = os.getenv("RAPIDAPI_KEY")
|
| | API_HOST = os.getenv("RAPIDAPI_HOST")
|
| |
|
| | HEADERS = {
|
| | "x-rapidapi-host": API_HOST,
|
| | "x-rapidapi-key": API_KEY
|
| | }
|
| |
|
| | BASE_URL = f"https://{API_HOST}/v1/app-store-api/reviews"
|
| |
|
| | def fetch_appstore_reviews(app_id, page=1, country="us", lang="en"):
|
| | params = {
|
| | "id": app_id,
|
| | "sort": "mostRecent",
|
| | "page": page,
|
| | "country": country,
|
| | "lang": lang
|
| | }
|
| | try:
|
| | resp = requests.get(BASE_URL, headers=HEADERS, params=params, timeout=10)
|
| | resp.raise_for_status()
|
| | return resp.json()
|
| | except Exception as e:
|
| | print(f"Exception fetching {app_id}:", e)
|
| | return None
|
| |
|
| | def search_appstore_app(query, country="us"):
|
| | """Search Apple App Store for an app, returning trackId."""
|
| | try:
|
| |
|
| | safe_query = quote_plus(str(query))
|
| |
|
| | url = f"https://{API_HOST}/v1/app-store-api/search"
|
| | params = {"q": safe_query, "country": country, "limit": 1}
|
| |
|
| |
|
| | resp = requests.get(url, headers=HEADERS, params=params, timeout=10)
|
| | resp.raise_for_status()
|
| | results = resp.json()
|
| |
|
| |
|
| | if isinstance(results, dict) and "results" in results:
|
| | results = results["results"]
|
| |
|
| | return results[0]["trackId"] if results else None
|
| |
|
| | except requests.exceptions.RequestException as e:
|
| | print(f"Search failed for {query}:", e)
|
| | return None
|
| | except Exception as e:
|
| | print(f"Unexpected error searching {query}:", e)
|
| | return None
|
| |
|
| | def merge_google_apple(google_df):
|
| | apple_data = []
|
| | for app in google_df['App']:
|
| | app_id = search_appstore_app(app)
|
| | if app_id:
|
| | reviews = fetch_appstore_reviews(app_id)
|
| | if reviews:
|
| | ratings = [r.get("score", 0) for r in reviews]
|
| | avg_rating = sum(ratings)/len(ratings) if ratings else None
|
| | apple_data.append({
|
| | "App": app,
|
| | "AppleRating": avg_rating,
|
| | "AppleReviews": len(reviews)
|
| | })
|
| | continue
|
| |
|
| | apple_data.append({"App": app, "AppleRating": None, "AppleReviews": 0})
|
| |
|
| | apple_df = pd.DataFrame(apple_data)
|
| | combined = pd.merge(google_df, apple_df, on="App", how="outer")
|
| | os.makedirs("data/cleaned_data", exist_ok=True)
|
| | combined.to_csv("data/cleaned_data/combined_apps.csv", index=False)
|
| | return combined
|
| |
|