Spaces:
Runtime error
Runtime error
added files methods and cleanup
Browse files- .gitignore +1 -0
- README.md +44 -1
- app.py +46 -75
- requirements.txt +6 -1
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.env
|
README.md
CHANGED
|
@@ -10,4 +10,47 @@ pinned: false
|
|
| 10 |
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# Market Analysis Tool (Hugging Face Spaces)
|
| 14 |
+
|
| 15 |
+
This app provides competitive intelligence for small businesses using Gradio and OpenAI. It finds competitors in a given city and business category, scrapes their websites, and provides actionable business improvement suggestions.
|
| 16 |
+
|
| 17 |
+
## Setup Instructions
|
| 18 |
+
|
| 19 |
+
1. **Clone or upload this repository to Hugging Face Spaces.**
|
| 20 |
+
|
| 21 |
+
2. **Create a `.env` file at the project root with your API keys:**
|
| 22 |
+
|
| 23 |
+
```
|
| 24 |
+
OPENAI_API_KEY=your_openai_api_key_here
|
| 25 |
+
GEO_API_KEY=your_geoapify_api_key_here
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
(You can copy `.env.example` as a template.)
|
| 29 |
+
|
| 30 |
+
3. **Install dependencies:**
|
| 31 |
+
|
| 32 |
+
Hugging Face Spaces will automatically install from `requirements.txt`. If running locally:
|
| 33 |
+
|
| 34 |
+
```
|
| 35 |
+
pip install -r requirements.txt
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
4. **Run the app:**
|
| 39 |
+
|
| 40 |
+
```
|
| 41 |
+
python app.py
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
or, on Hugging Face Spaces, it will launch automatically.
|
| 45 |
+
|
| 46 |
+
## Usage
|
| 47 |
+
|
| 48 |
+
- Enter your business name and city (currently supports Netivot, Israel).
|
| 49 |
+
- The app will find competitors, analyze their websites, and suggest improvements for your business.
|
| 50 |
+
|
| 51 |
+
## Environment Variables
|
| 52 |
+
- `OPENAI_API_KEY`: Your OpenAI API key
|
| 53 |
+
- `GEO_API_KEY`: Your Geoapify API key
|
| 54 |
+
|
| 55 |
+
## License
|
| 56 |
+
MIT
|
app.py
CHANGED
|
@@ -1,33 +1,13 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
Automatically generated by Colab.
|
| 5 |
-
|
| 6 |
-
Original file is located at
|
| 7 |
-
https://colab.research.google.com/drive/1Favva8SJYH_uFh8AuoVhRZnmyjJrTP8c
|
| 8 |
-
|
| 9 |
-
# Week 3 project - Create dataset about competitors
|
| 10 |
-
|
| 11 |
-
# Brief - a research tool for businesses about their client in their area
|
| 12 |
-
|
| 13 |
-
The tool will:
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
1. Find businesses across the same location using google maps.
|
| 17 |
-
2. Compare business plans and services
|
| 18 |
-
3. advise and help to client to imporve their bussiness accroding to the their competitors
|
| 19 |
-
|
| 20 |
-
# imports and installations
|
| 21 |
-
"""
|
| 22 |
-
|
| 23 |
-
!pip install bs4 openai google-api-python-client gradio
|
| 24 |
-
|
| 25 |
-
"""**Define categories**"""
|
| 26 |
|
| 27 |
from openai import OpenAI
|
| 28 |
-
from google.colab import userdata
|
| 29 |
import json
|
| 30 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
categories = """
|
| 33 |
`accommodation`
|
|
@@ -748,32 +728,31 @@ categories = """
|
|
| 748 |
"""
|
| 749 |
|
| 750 |
|
| 751 |
-
openai_key =
|
| 752 |
-
geo_api_key =
|
| 753 |
|
| 754 |
import requests
|
| 755 |
from requests.structures import CaseInsensitiveDict
|
| 756 |
|
| 757 |
-
|
| 758 |
-
print(f"get_competitors_data: category-{category} place_id={place_id}")
|
| 759 |
-
url = f"https://api.geoapify.com/v2/places?categories={category}&filter=place:{place_id}&limit={limit}&apiKey={geo_api_key}"
|
| 760 |
-
response = requests.get(url)
|
| 761 |
-
result = response.json()
|
| 762 |
-
websites = []
|
| 763 |
-
print(f"result: {result}")
|
| 764 |
-
print(result.get("features"))
|
| 765 |
-
for item in result["features"]:
|
| 766 |
-
if "website" in item["properties"] and item["properties"]["website"]:
|
| 767 |
-
websites.append(item["properties"]["website"])
|
| 768 |
-
return websites
|
| 769 |
|
| 770 |
-
def
|
| 771 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 772 |
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
|
|
|
|
|
|
| 777 |
|
| 778 |
import re
|
| 779 |
from urllib.parse import urlparse
|
|
@@ -812,14 +791,14 @@ from bs4 import BeautifulSoup
|
|
| 812 |
from urllib.parse import urlparse, urljoin
|
| 813 |
|
| 814 |
def extract_data(websites):
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
|
| 824 |
def extract_and_clean_website_data(url, base_url=None):
|
| 825 |
|
|
@@ -852,7 +831,6 @@ def extract_and_clean_website_data(url, base_url=None):
|
|
| 852 |
|
| 853 |
if base_url is None:
|
| 854 |
base_url = urlparse(url).netloc
|
| 855 |
-
print(base_url)
|
| 856 |
if not base_url.startswith("http"):
|
| 857 |
base_url = f"{urlparse(url).scheme}://{base_url}"
|
| 858 |
|
|
@@ -878,24 +856,14 @@ You are a market analysis agent specializing in competitive intelligence for sma
|
|
| 878 |
Here is a comprehensive list of supported categories from the Geoapify API. When calling the tool,
|
| 879 |
choose the most appropriate category that best describes the user's business to find relevant competitors in their area.
|
| 880 |
|
| 881 |
-
|
| 882 |
# %s
|
| 883 |
|
| 884 |
-
|
| 885 |
|
| 886 |
-
If the user's business is a "pizza place," you would use the category
|
| 887 |
|
| 888 |
Remember to choose the most specific and relevant category for the user's business to get the most accurate competitor data. If you are unsure, you can ask the user for clarification on their business type.
|
| 889 |
-
```
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
<tools>
|
| 893 |
-
'get_place_id': This tool retrieves a unique place identifier based on a provided city name. The 'city' parameter should be a string representing the target city (e.g., "Netivot"). The output is a string representing the place ID (e.g., "ChIJD6pJnvN9AhURN9WyDAkoA_Y" for Netivot).
|
| 894 |
-
|
| 895 |
-
'get_competitors_data': This tool identifies and retrieves relevant data for competitors within the specified geographical area (obtained using 'get_place_id') and business category. The business category should be inferred from the user's business name. This tool will utilize a Geoapify API category to search for competitors. The output is a list of dictionaries, where each dictionary contains competitor information, including their website URL (e.g., [{"website": "https://competitor1.com", "location": {...}}, {"website": "https://competitor2.com", "location": {...}}]).
|
| 896 |
-
|
| 897 |
-
'extract_data': This tool scrapes and extracts textual content from a list of competitor websites provided as input (a list of URLs from the 'get_competitors_data' output). The output is a list of dictionaries, where each dictionary contains the original URL and the extracted data from that website (e.g., [{"url": "https://competitor1.com", "data": "Extracted content from competitor 1's website."}, {"url": "https://competitor2.com", "data": "Extracted content from competitor 2's website."}]).
|
| 898 |
-
</tools>
|
| 899 |
|
| 900 |
Your workflow should be as follows:
|
| 901 |
|
|
@@ -906,7 +874,7 @@ Your workflow should be as follows:
|
|
| 906 |
5. Call the 'extract_data' tool with the list of competitor website URLs obtained in step 4 to scrape and extract content from each website.
|
| 907 |
6. Analyze the extracted data from the competitor websites to identify their strengths, weaknesses, offerings, and strategies.
|
| 908 |
7. Based on your analysis of the competitive landscape and the user's presumed business, generate a concise, actionable list of major improvements the client can implement to enhance their business and attract more customers. Ensure these recommendations are strategic and directly address potential areas for competitive advantage.
|
| 909 |
-
|
| 910 |
|
| 911 |
# Define the function as a tool for the Assistant
|
| 912 |
get_place_id_tool = {
|
|
@@ -981,6 +949,8 @@ extract_data_tool = {
|
|
| 981 |
tools = [get_place_id_tool, get_competitors_data_tool, extract_data_tool]
|
| 982 |
|
| 983 |
def message_to_gpt(message, history):
|
|
|
|
|
|
|
| 984 |
messages = [{"role": "system", "content": system_message}]
|
| 985 |
|
| 986 |
# Build the message history
|
|
@@ -1010,12 +980,16 @@ def message_to_gpt(message, history):
|
|
| 1010 |
print(f"Unexpected finish reason: {response.choices[0].finish_reason}")
|
| 1011 |
break # Or handle differently based on your needs
|
| 1012 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1013 |
# Return the assistant's final response content
|
| 1014 |
return response.choices[0].message.content
|
| 1015 |
|
| 1016 |
def handle_tool_call(message):
|
| 1017 |
tool_call = message.tool_calls[0]
|
| 1018 |
-
print(f"Inside handle_tool_call with this tool: {tool_call.function.name}")
|
| 1019 |
arguments = json.loads(tool_call.function.arguments)
|
| 1020 |
|
| 1021 |
if tool_call.function.name == "extract_data":
|
|
@@ -1030,7 +1004,6 @@ def handle_tool_call(message):
|
|
| 1030 |
elif tool_call.function.name == "get_place_id":
|
| 1031 |
city = arguments.get("city")
|
| 1032 |
tool_result = get_place_id(city)
|
| 1033 |
-
print(f"tool_result: {tool_result}")
|
| 1034 |
response = {
|
| 1035 |
"role": "tool",
|
| 1036 |
"content": json.dumps({"place_id": tool_result}), # Return place_id as a JSON object
|
|
@@ -1058,8 +1031,6 @@ def handle_tool_call(message):
|
|
| 1058 |
|
| 1059 |
return response
|
| 1060 |
|
| 1061 |
-
|
| 1062 |
-
|
| 1063 |
-
|
| 1064 |
if __name__ == "__main__":
|
| 1065 |
-
gr.ChatInterface(fn=message_to_gpt, type="messages").launch(debug=True)
|
|
|
|
| 1 |
+
# This app is ready for Hugging Face Spaces. Environment variables are loaded from a .env file.
|
| 2 |
+
# Usage: Set OPENAI_API_KEY and GEO_API_KEY in your environment or in a .env file at the project root.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
from openai import OpenAI
|
|
|
|
| 5 |
import json
|
| 6 |
import gradio as gr
|
| 7 |
+
import os
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
load_dotenv()
|
| 11 |
|
| 12 |
categories = """
|
| 13 |
`accommodation`
|
|
|
|
| 728 |
"""
|
| 729 |
|
| 730 |
|
| 731 |
+
openai_key = os.environ.get('OPENAI_API_KEY')
|
| 732 |
+
geo_api_key = os.environ.get('GEO_API_KEY')
|
| 733 |
|
| 734 |
import requests
|
| 735 |
from requests.structures import CaseInsensitiveDict
|
| 736 |
|
| 737 |
+
chain_of_thought = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
|
| 739 |
+
def get_competitors_data(category="commercial", limit=50, place_id="51d8aaf091586a414059288705ad76154040f00102f9015f13990300000000c002089203084865727a6c697961"):
|
| 740 |
+
chain_of_thought.append(f"Calling get_competitors_data with category='{category}', limit={limit}, place_id='{place_id}'")
|
| 741 |
+
url = f"https://api.geoapify.com/v2/places?categories={category}&filter=place:{place_id}&limit={limit}&apiKey={geo_api_key}"
|
| 742 |
+
response = requests.get(url)
|
| 743 |
+
result = response.json()
|
| 744 |
+
websites = []
|
| 745 |
+
for item in result["features"]:
|
| 746 |
+
if "website" in item["properties"] and item["properties"]["website"]:
|
| 747 |
+
websites.append(item["properties"]["website"])
|
| 748 |
+
return websites
|
| 749 |
|
| 750 |
+
def get_place_id(city):
|
| 751 |
+
chain_of_thought.append(f"Calling get_place_id with city='{city}'")
|
| 752 |
+
url = f"https://api.geoapify.com/v1/geocode/search?text={city}&filter=countrycode:il&apiKey={geo_api_key}"
|
| 753 |
+
response = requests.get(url)
|
| 754 |
+
place_id = response.json()['features'][0].get("properties")['place_id']
|
| 755 |
+
return place_id
|
| 756 |
|
| 757 |
import re
|
| 758 |
from urllib.parse import urlparse
|
|
|
|
| 791 |
from urllib.parse import urlparse, urljoin
|
| 792 |
|
| 793 |
def extract_data(websites):
|
| 794 |
+
chain_of_thought.append(f"Calling extract_data for {len(websites)} websites")
|
| 795 |
+
websites_data = []
|
| 796 |
+
for website in websites:
|
| 797 |
+
if is_business_website(website):
|
| 798 |
+
homepage = get_homepage_url(website)
|
| 799 |
+
data = extract_and_clean_website_data(homepage, base_url=None)
|
| 800 |
+
websites_data = {"url": website, "data": data}
|
| 801 |
+
return websites_data
|
| 802 |
|
| 803 |
def extract_and_clean_website_data(url, base_url=None):
|
| 804 |
|
|
|
|
| 831 |
|
| 832 |
if base_url is None:
|
| 833 |
base_url = urlparse(url).netloc
|
|
|
|
| 834 |
if not base_url.startswith("http"):
|
| 835 |
base_url = f"{urlparse(url).scheme}://{base_url}"
|
| 836 |
|
|
|
|
| 856 |
Here is a comprehensive list of supported categories from the Geoapify API. When calling the tool,
|
| 857 |
choose the most appropriate category that best describes the user's business to find relevant competitors in their area.
|
| 858 |
|
| 859 |
+
Geoapify API Supported Categories:
|
| 860 |
# %s
|
| 861 |
|
| 862 |
+
Example Usage:
|
| 863 |
|
| 864 |
+
If the user's business is a "pizza place," you would use the category catering.restaurant.pizza,catering.restaurant.italian,catering.restaurant.italian (use several categories to find more businesses) with the get_competitors_data tool. If it's a "clothing store for women," you would use commercial.clothing.women.
|
| 865 |
|
| 866 |
Remember to choose the most specific and relevant category for the user's business to get the most accurate competitor data. If you are unsure, you can ask the user for clarification on their business type.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 867 |
|
| 868 |
Your workflow should be as follows:
|
| 869 |
|
|
|
|
| 874 |
5. Call the 'extract_data' tool with the list of competitor website URLs obtained in step 4 to scrape and extract content from each website.
|
| 875 |
6. Analyze the extracted data from the competitor websites to identify their strengths, weaknesses, offerings, and strategies.
|
| 876 |
7. Based on your analysis of the competitive landscape and the user's presumed business, generate a concise, actionable list of major improvements the client can implement to enhance their business and attract more customers. Ensure these recommendations are strategic and directly address potential areas for competitive advantage.
|
| 877 |
+
""" % (categories)
|
| 878 |
|
| 879 |
# Define the function as a tool for the Assistant
|
| 880 |
get_place_id_tool = {
|
|
|
|
| 949 |
tools = [get_place_id_tool, get_competitors_data_tool, extract_data_tool]
|
| 950 |
|
| 951 |
def message_to_gpt(message, history):
|
| 952 |
+
global chain_of_thought
|
| 953 |
+
chain_of_thought = []
|
| 954 |
messages = [{"role": "system", "content": system_message}]
|
| 955 |
|
| 956 |
# Build the message history
|
|
|
|
| 980 |
print(f"Unexpected finish reason: {response.choices[0].finish_reason}")
|
| 981 |
break # Or handle differently based on your needs
|
| 982 |
|
| 983 |
+
# Print the chain of thought for debugging/inspection
|
| 984 |
+
print("Chain of Thought:")
|
| 985 |
+
for step in chain_of_thought:
|
| 986 |
+
print(step)
|
| 987 |
+
|
| 988 |
# Return the assistant's final response content
|
| 989 |
return response.choices[0].message.content
|
| 990 |
|
| 991 |
def handle_tool_call(message):
|
| 992 |
tool_call = message.tool_calls[0]
|
|
|
|
| 993 |
arguments = json.loads(tool_call.function.arguments)
|
| 994 |
|
| 995 |
if tool_call.function.name == "extract_data":
|
|
|
|
| 1004 |
elif tool_call.function.name == "get_place_id":
|
| 1005 |
city = arguments.get("city")
|
| 1006 |
tool_result = get_place_id(city)
|
|
|
|
| 1007 |
response = {
|
| 1008 |
"role": "tool",
|
| 1009 |
"content": json.dumps({"place_id": tool_result}), # Return place_id as a JSON object
|
|
|
|
| 1031 |
|
| 1032 |
return response
|
| 1033 |
|
| 1034 |
+
# This app is ready for Hugging Face Spaces. Environment variables are loaded from a .env file.
|
|
|
|
|
|
|
| 1035 |
if __name__ == "__main__":
|
| 1036 |
+
gr.ChatInterface(fn=message_to_gpt, type="messages").launch(debug=True)
|
requirements.txt
CHANGED
|
@@ -1 +1,6 @@
|
|
| 1 |
-
huggingface_hub==0.25.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
huggingface_hub==0.25.2
|
| 2 |
+
gradio
|
| 3 |
+
openai
|
| 4 |
+
requests
|
| 5 |
+
beautifulsoup4
|
| 6 |
+
python-dotenv
|