last_edit / server /search_intel.py
Moharek
Deploy Moharek GEO Platform
a74b879
"""Lightweight Search Intelligence helpers.
Provides keyword research and competitor scraping using free tools in this repo:
- uses `src.crawler` to fetch pages
- uses `server.keyword_engine` for keyword extraction
These are intentionally minimal and safe fallbacks when external APIs/keys
are not available.
"""
from typing import List, Dict
import os
try:
from googleapiclient.discovery import build
except Exception:
build = None
from server import keyword_engine
def keywords_from_url(url: str, max_pages: int = 1, top_n: int = 40, enrich: bool = True):
try:
from src import crawler
except Exception:
raise RuntimeError('crawler not available')
pages = crawler.crawl_seed(url, max_pages=max_pages)
audit_obj = {'pages': pages}
kws = keyword_engine.extract_keywords_from_audit(audit_obj, top_n=top_n, enrich=enrich)
return { 'keywords': kws, 'pages': [{'url': p.get('url'), 'title': p.get('title')} for p in pages] }
def competitor_links(url: str, max_pages: int = 3):
try:
from src import crawler
except Exception:
raise RuntimeError('crawler not available')
pages = crawler.crawl_seed(url, max_pages=max_pages)
external = {}
for p in pages:
for l in p.get('links', []):
if not l.startswith(url):
external[l] = external.get(l, 0) + 1
items = sorted(external.items(), key=lambda x: x[1], reverse=True)
return { 'competitors': [{'url': u, 'count': c} for u,c in items] }
def gsc_query(site_url: str, start_date: str, end_date: str, row_limit: int = 2500):
"""Try to call Google Search Console API if `googleapiclient` and credentials are available.
Returns {'enabled': False, 'reason': ...} when not available.
"""
if build is None:
return { 'enabled': False, 'reason': 'googleapiclient not installed' }
# Credentials are expected in environment via GOOGLE_APPLICATION_CREDENTIALS JSON path
try:
service = build('searchconsole', 'v1')
body = {'startDate': start_date, 'endDate': end_date, 'dimensions': ['query'], 'rowLimit': row_limit}
resp = service.searchanalytics().query(siteUrl=site_url, body=body).execute()
return { 'enabled': True, 'result': resp }
except Exception as e:
return { 'enabled': False, 'reason': str(e) }