Spaces:
Running
Running
Create geo_agent.py
Browse files- geo_agent.py +114 -0
geo_agent.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
|
| 5 |
+
client = OpenAI(api_key="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")
|
| 6 |
+
|
| 7 |
+
def scrape_website(url):
|
| 8 |
+
try:
|
| 9 |
+
headers = {'User-Agent': 'Mozilla/5.0 (compatible; GEOAgent/1.0)'}
|
| 10 |
+
response = requests.get(url, headers=headers, timeout=10)
|
| 11 |
+
response.raise_for_status()
|
| 12 |
+
|
| 13 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 14 |
+
|
| 15 |
+
title = soup.find('title').text if soup.find('title') else 'No Title Found'
|
| 16 |
+
meta_desc = soup.find('meta', attrs={'name': 'description'})
|
| 17 |
+
meta_desc_content = meta_desc['content'] if meta_desc and meta_desc.get('content') else 'No Meta Description Found'
|
| 18 |
+
|
| 19 |
+
main_content_tags = ['p', 'h1', 'h2', 'h3', 'li', 'td']
|
| 20 |
+
|
| 21 |
+
main_content = []
|
| 22 |
+
for tag in soup.find_all(main_content_tags):
|
| 23 |
+
text = tag.get_text(strip=True)
|
| 24 |
+
if text and len(text) > 15:
|
| 25 |
+
main_content.append(f"{tag.name}: {text}")
|
| 26 |
+
|
| 27 |
+
analysis_data = {
|
| 28 |
+
"URL": url,
|
| 29 |
+
"Title": title,
|
| 30 |
+
"Meta Description": meta_desc_content,
|
| 31 |
+
"Full Content Length": len("\n".join(main_content))
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
full_content_string = "\n".join(main_content)
|
| 35 |
+
|
| 36 |
+
return analysis_data, full_content_string[:10000]
|
| 37 |
+
|
| 38 |
+
except requests.exceptions.RequestException as e:
|
| 39 |
+
return {"Error": f"Could not access the website: {e}"}, ""
|
| 40 |
+
|
| 41 |
+
def run_agent_analysis(url, scraped_data_dict, full_content):
|
| 42 |
+
|
| 43 |
+
system_prompt = f"""
|
| 44 |
+
You are the world's leading Generative Engine Optimization (GEO) Expert Agent. Your task is to analyze the provided website content snippet and assign a GEO Score out of 10. The score must be based on the content's readiness to be cited, summarized, and trusted by advanced AI models (like yourself).
|
| 45 |
+
|
| 46 |
+
**SCORING CRITERIA (Total 10.0 Points):**
|
| 47 |
+
|
| 48 |
+
1. **Citation Readiness (3.0 Points):**
|
| 49 |
+
* *Goal:* Can an AI easily extract a direct, quotable answer?
|
| 50 |
+
* *Check:* Are facts, statistics, and definitions stated concisely, often in the first 150 words? Is the language clear and non-ambiguous?
|
| 51 |
+
2. **Content Structure & Entity Optimization (2.5 Points):**
|
| 52 |
+
* *Goal:* Is the information logically structured for AI chunking and entity recognition?
|
| 53 |
+
* *Check:* Are headings (H1, H2, H3) descriptive and hierarchical? Is the main topic entity (brand, person, product) consistently named and defined?
|
| 54 |
+
3. **E-E-A-T & Trust Signals (2.5 Points):**
|
| 55 |
+
* *Goal:* Does the content demonstrate Experience, Expertise, Authoritativeness, and Trustworthiness?
|
| 56 |
+
* *Check:* Is the content recent? Are clear author/publisher signals present (even if scraped data is limited)? Does the tone sound expert and trustworthy?
|
| 57 |
+
4. **Technical & Schema Potential (2.0 Points):**
|
| 58 |
+
* *Goal:* Is the metadata and on-page structure compliant for rich AI results?
|
| 59 |
+
* *Check:* Is the Title compelling and descriptive? Is the Meta Description a good, concise summary? (Assume basic Schema is absent if not explicitly found).
|
| 60 |
+
|
| 61 |
+
**INPUT DATA:**
|
| 62 |
+
---
|
| 63 |
+
**URL:** {url}
|
| 64 |
+
**Title:** {scraped_data_dict.get('Title', 'N/A')}
|
| 65 |
+
**Meta Description:** {scraped_data_dict.get('Meta Description', 'N/A')}
|
| 66 |
+
**Content Snippet (Max 10k chars):**
|
| 67 |
+
{full_content}
|
| 68 |
+
---
|
| 69 |
+
|
| 70 |
+
**YOUR TASK OUTPUT MUST BE IN THE FOLLOWING FORMAT (Do not include any other text or commentary outside this structure):**
|
| 71 |
+
|
| 72 |
+
**GEO SCORE:** [X.X/10.0]
|
| 73 |
+
|
| 74 |
+
**PILLAR SCORES:**
|
| 75 |
+
* Citation Readiness: [X.X/3.0]
|
| 76 |
+
* Content Structure & Entity Optimization: [X.X/2.5]
|
| 77 |
+
* E-E-A-T & Trust Signals: [X.X/2.5]
|
| 78 |
+
* Technical & Schema Potential: [X.X/2.0]
|
| 79 |
+
|
| 80 |
+
**DETAILED SUGGESTIONS (Top 3 Priority Improvements):**
|
| 81 |
+
1. **[Pillar Name]:** Actionable step to improve the score.
|
| 82 |
+
2. **[Pillar Name]:** Actionable step to improve the score.
|
| 83 |
+
3. **[Pillar Name]:** Actionable step to improve the score.
|
| 84 |
+
"""
|
| 85 |
+
|
| 86 |
+
try:
|
| 87 |
+
completion = client.chat.completions.create(
|
| 88 |
+
model="gpt-4o-mini",
|
| 89 |
+
messages=[
|
| 90 |
+
{"role": "system", "content": system_prompt}
|
| 91 |
+
]
|
| 92 |
+
)
|
| 93 |
+
return completion.choices[0].message.content
|
| 94 |
+
|
| 95 |
+
except Exception as e:
|
| 96 |
+
return f"An error occurred during AI processing: {e}"
|
| 97 |
+
|
| 98 |
+
if __name__ == "__main__":
|
| 99 |
+
target_url = input("Enter the URL of the website to analyze (e.g., https://example.com): ")
|
| 100 |
+
|
| 101 |
+
analysis_data, full_content_string = scrape_website(target_url)
|
| 102 |
+
|
| 103 |
+
if "Error" in analysis_data:
|
| 104 |
+
print(analysis_data["Error"])
|
| 105 |
+
else:
|
| 106 |
+
print(f"\n--- Running GEO Agent Analysis for: {target_url} ---")
|
| 107 |
+
|
| 108 |
+
ai_response = run_agent_analysis(target_url, analysis_data, full_content_string)
|
| 109 |
+
|
| 110 |
+
print("\n" + "="*50)
|
| 111 |
+
print(" GEO AGENT AUDIT COMPLETE")
|
| 112 |
+
print("="*50 + "\n")
|
| 113 |
+
print(ai_response)
|
| 114 |
+
print("\n" + "="*50)
|