Liorlsa9 commited on
Commit
a5be4c1
·
1 Parent(s): ee97e75

added files methods and cleanup

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. README.md +44 -1
  3. app.py +46 -75
  4. requirements.txt +6 -1
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
README.md CHANGED
@@ -10,4 +10,47 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  license: mit
11
  ---
12
 
13
+ # Market Analysis Tool (Hugging Face Spaces)
14
+
15
+ This app provides competitive intelligence for small businesses using Gradio and OpenAI. It finds competitors in a given city and business category, scrapes their websites, and provides actionable business improvement suggestions.
16
+
17
+ ## Setup Instructions
18
+
19
+ 1. **Clone or upload this repository to Hugging Face Spaces.**
20
+
21
+ 2. **Create a `.env` file at the project root with your API keys:**
22
+
23
+ ```
24
+ OPENAI_API_KEY=your_openai_api_key_here
25
+ GEO_API_KEY=your_geoapify_api_key_here
26
+ ```
27
+
28
+ (You can copy `.env.example` as a template.)
29
+
30
+ 3. **Install dependencies:**
31
+
32
+ Hugging Face Spaces will automatically install from `requirements.txt`. If running locally:
33
+
34
+ ```
35
+ pip install -r requirements.txt
36
+ ```
37
+
38
+ 4. **Run the app:**
39
+
40
+ ```
41
+ python app.py
42
+ ```
43
+
44
+ or, on Hugging Face Spaces, it will launch automatically.
45
+
46
+ ## Usage
47
+
48
+ - Enter your business name and city (currently supports Netivot, Israel).
49
+ - The app will find competitors, analyze their websites, and suggest improvements for your business.
50
+
51
+ ## Environment Variables
52
+ - `OPENAI_API_KEY`: Your OpenAI API key
53
+ - `GEO_API_KEY`: Your Geoapify API key
54
+
55
+ ## License
56
+ MIT
app.py CHANGED
@@ -1,33 +1,13 @@
1
- # -*- coding: utf-8 -*-
2
- """synthetic_data_generator.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1Favva8SJYH_uFh8AuoVhRZnmyjJrTP8c
8
-
9
- # Week 3 project - Create dataset about competitors
10
-
11
- # Brief - a research tool for businesses about their client in their area
12
-
13
- The tool will:
14
-
15
-
16
- 1. Find businesses across the same location using google maps.
17
- 2. Compare business plans and services
18
- 3. advise and help to client to imporve their bussiness accroding to the their competitors
19
-
20
- # imports and installations
21
- """
22
-
23
- !pip install bs4 openai google-api-python-client gradio
24
-
25
- """**Define categories**"""
26
 
27
  from openai import OpenAI
28
- from google.colab import userdata
29
  import json
30
  import gradio as gr
 
 
 
 
31
 
32
  categories = """
33
  `accommodation`
@@ -748,32 +728,31 @@ categories = """
748
  """
749
 
750
 
751
- openai_key = userdata.get('OPENAI_API_KEY')
752
- geo_api_key = userdata.get('GEO_API_KEY')
753
 
754
  import requests
755
  from requests.structures import CaseInsensitiveDict
756
 
757
- def get_competitors_data(category="commercial",limit=50,place_id="51d8aaf091586a414059288705ad76154040f00102f9015f13990300000000c002089203084865727a6c697961"):
758
- print(f"get_competitors_data: category-{category} place_id={place_id}")
759
- url = f"https://api.geoapify.com/v2/places?categories={category}&filter=place:{place_id}&limit={limit}&apiKey={geo_api_key}"
760
- response = requests.get(url)
761
- result = response.json()
762
- websites = []
763
- print(f"result: {result}")
764
- print(result.get("features"))
765
- for item in result["features"]:
766
- if "website" in item["properties"] and item["properties"]["website"]:
767
- websites.append(item["properties"]["website"])
768
- return websites
769
 
770
- def get_place_id(city):
771
- print(f"get_place_id city: {city}")
 
 
 
 
 
 
 
 
772
 
773
- url = f"https://api.geoapify.com/v1/geocode/search?text={city}&filter=countrycode:il&apiKey={geo_api_key}"
774
- response = requests.get(url)
775
- place_id = response.json()['features'][0].get("properties")['place_id']
776
- return place_id
 
 
777
 
778
  import re
779
  from urllib.parse import urlparse
@@ -812,14 +791,14 @@ from bs4 import BeautifulSoup
812
  from urllib.parse import urlparse, urljoin
813
 
814
  def extract_data(websites):
815
- print(f"extract_data: {websites}")
816
- websites_data = []
817
- for website in websites:
818
- if is_business_website(website):
819
- homepage = get_homepage_url(website)
820
- data = extract_and_clean_website_data(homepage, base_url=None)
821
- websites_data = {"url":website, "data":data}
822
- return websites_data
823
 
824
  def extract_and_clean_website_data(url, base_url=None):
825
 
@@ -852,7 +831,6 @@ def extract_and_clean_website_data(url, base_url=None):
852
 
853
  if base_url is None:
854
  base_url = urlparse(url).netloc
855
- print(base_url)
856
  if not base_url.startswith("http"):
857
  base_url = f"{urlparse(url).scheme}://{base_url}"
858
 
@@ -878,24 +856,14 @@ You are a market analysis agent specializing in competitive intelligence for sma
878
  Here is a comprehensive list of supported categories from the Geoapify API. When calling the tool,
879
  choose the most appropriate category that best describes the user's business to find relevant competitors in their area.
880
 
881
- **Geoapify API Supported Categories:**
882
  # %s
883
 
884
- **Example Usage:**
885
 
886
- If the user's business is a "pizza place," you would use the category `catering.restaurant.pizza,catering.restaurant.italian,catering.restaurant` (use several categories to find more businesses) with the `get_competitors_data` tool. If it's a "clothing store for women," you would use `commercial.clothing.women`.
887
 
888
  Remember to choose the most specific and relevant category for the user's business to get the most accurate competitor data. If you are unsure, you can ask the user for clarification on their business type.
889
- ```
890
-
891
-
892
- <tools>
893
- 'get_place_id': This tool retrieves a unique place identifier based on a provided city name. The 'city' parameter should be a string representing the target city (e.g., "Netivot"). The output is a string representing the place ID (e.g., "ChIJD6pJnvN9AhURN9WyDAkoA_Y" for Netivot).
894
-
895
- 'get_competitors_data': This tool identifies and retrieves relevant data for competitors within the specified geographical area (obtained using 'get_place_id') and business category. The business category should be inferred from the user's business name. This tool will utilize a Geoapify API category to search for competitors. The output is a list of dictionaries, where each dictionary contains competitor information, including their website URL (e.g., [{"website": "https://competitor1.com", "location": {...}}, {"website": "https://competitor2.com", "location": {...}}]).
896
-
897
- 'extract_data': This tool scrapes and extracts textual content from a list of competitor websites provided as input (a list of URLs from the 'get_competitors_data' output). The output is a list of dictionaries, where each dictionary contains the original URL and the extracted data from that website (e.g., [{"url": "https://competitor1.com", "data": "Extracted content from competitor 1's website."}, {"url": "https://competitor2.com", "data": "Extracted content from competitor 2's website."}]).
898
- </tools>
899
 
900
  Your workflow should be as follows:
901
 
@@ -906,7 +874,7 @@ Your workflow should be as follows:
906
  5. Call the 'extract_data' tool with the list of competitor website URLs obtained in step 4 to scrape and extract content from each website.
907
  6. Analyze the extracted data from the competitor websites to identify their strengths, weaknesses, offerings, and strategies.
908
  7. Based on your analysis of the competitive landscape and the user's presumed business, generate a concise, actionable list of major improvements the client can implement to enhance their business and attract more customers. Ensure these recommendations are strategic and directly address potential areas for competitive advantage.
909
- ```""" % (categories)
910
 
911
  # Define the function as a tool for the Assistant
912
  get_place_id_tool = {
@@ -981,6 +949,8 @@ extract_data_tool = {
981
  tools = [get_place_id_tool, get_competitors_data_tool, extract_data_tool]
982
 
983
  def message_to_gpt(message, history):
 
 
984
  messages = [{"role": "system", "content": system_message}]
985
 
986
  # Build the message history
@@ -1010,12 +980,16 @@ def message_to_gpt(message, history):
1010
  print(f"Unexpected finish reason: {response.choices[0].finish_reason}")
1011
  break # Or handle differently based on your needs
1012
 
 
 
 
 
 
1013
  # Return the assistant's final response content
1014
  return response.choices[0].message.content
1015
 
1016
  def handle_tool_call(message):
1017
  tool_call = message.tool_calls[0]
1018
- print(f"Inside handle_tool_call with this tool: {tool_call.function.name}")
1019
  arguments = json.loads(tool_call.function.arguments)
1020
 
1021
  if tool_call.function.name == "extract_data":
@@ -1030,7 +1004,6 @@ def handle_tool_call(message):
1030
  elif tool_call.function.name == "get_place_id":
1031
  city = arguments.get("city")
1032
  tool_result = get_place_id(city)
1033
- print(f"tool_result: {tool_result}")
1034
  response = {
1035
  "role": "tool",
1036
  "content": json.dumps({"place_id": tool_result}), # Return place_id as a JSON object
@@ -1058,8 +1031,6 @@ def handle_tool_call(message):
1058
 
1059
  return response
1060
 
1061
-
1062
-
1063
-
1064
  if __name__ == "__main__":
1065
- gr.ChatInterface(fn=message_to_gpt, type="messages").launch(debug=True)
 
1
+ # This app is ready for Hugging Face Spaces. Environment variables are loaded from a .env file.
2
+ # Usage: Set OPENAI_API_KEY and GEO_API_KEY in your environment or in a .env file at the project root.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  from openai import OpenAI
 
5
  import json
6
  import gradio as gr
7
+ import os
8
+ from dotenv import load_dotenv
9
+
10
+ load_dotenv()
11
 
12
  categories = """
13
  `accommodation`
 
728
  """
729
 
730
 
731
+ openai_key = os.environ.get('OPENAI_API_KEY')
732
+ geo_api_key = os.environ.get('GEO_API_KEY')
733
 
734
  import requests
735
  from requests.structures import CaseInsensitiveDict
736
 
737
+ chain_of_thought = []
 
 
 
 
 
 
 
 
 
 
 
738
 
739
+ def get_competitors_data(category="commercial", limit=50, place_id="51d8aaf091586a414059288705ad76154040f00102f9015f13990300000000c002089203084865727a6c697961"):
740
+ chain_of_thought.append(f"Calling get_competitors_data with category='{category}', limit={limit}, place_id='{place_id}'")
741
+ url = f"https://api.geoapify.com/v2/places?categories={category}&filter=place:{place_id}&limit={limit}&apiKey={geo_api_key}"
742
+ response = requests.get(url)
743
+ result = response.json()
744
+ websites = []
745
+ for item in result["features"]:
746
+ if "website" in item["properties"] and item["properties"]["website"]:
747
+ websites.append(item["properties"]["website"])
748
+ return websites
749
 
750
+ def get_place_id(city):
751
+ chain_of_thought.append(f"Calling get_place_id with city='{city}'")
752
+ url = f"https://api.geoapify.com/v1/geocode/search?text={city}&filter=countrycode:il&apiKey={geo_api_key}"
753
+ response = requests.get(url)
754
+ place_id = response.json()['features'][0].get("properties")['place_id']
755
+ return place_id
756
 
757
  import re
758
  from urllib.parse import urlparse
 
791
  from urllib.parse import urlparse, urljoin
792
 
793
  def extract_data(websites):
794
+ chain_of_thought.append(f"Calling extract_data for {len(websites)} websites")
795
+ websites_data = []
796
+ for website in websites:
797
+ if is_business_website(website):
798
+ homepage = get_homepage_url(website)
799
+ data = extract_and_clean_website_data(homepage, base_url=None)
800
+ websites_data = {"url": website, "data": data}
801
+ return websites_data
802
 
803
  def extract_and_clean_website_data(url, base_url=None):
804
 
 
831
 
832
  if base_url is None:
833
  base_url = urlparse(url).netloc
 
834
  if not base_url.startswith("http"):
835
  base_url = f"{urlparse(url).scheme}://{base_url}"
836
 
 
856
  Here is a comprehensive list of supported categories from the Geoapify API. When calling the tool,
857
  choose the most appropriate category that best describes the user's business to find relevant competitors in their area.
858
 
859
+ Geoapify API Supported Categories:
860
  # %s
861
 
862
+ Example Usage:
863
 
864
+ If the user's business is a "pizza place," you would use the category catering.restaurant.pizza,catering.restaurant.italian,catering.restaurant.italian (use several categories to find more businesses) with the get_competitors_data tool. If it's a "clothing store for women," you would use commercial.clothing.women.
865
 
866
  Remember to choose the most specific and relevant category for the user's business to get the most accurate competitor data. If you are unsure, you can ask the user for clarification on their business type.
 
 
 
 
 
 
 
 
 
 
867
 
868
  Your workflow should be as follows:
869
 
 
874
  5. Call the 'extract_data' tool with the list of competitor website URLs obtained in step 4 to scrape and extract content from each website.
875
  6. Analyze the extracted data from the competitor websites to identify their strengths, weaknesses, offerings, and strategies.
876
  7. Based on your analysis of the competitive landscape and the user's presumed business, generate a concise, actionable list of major improvements the client can implement to enhance their business and attract more customers. Ensure these recommendations are strategic and directly address potential areas for competitive advantage.
877
+ """ % (categories)
878
 
879
  # Define the function as a tool for the Assistant
880
  get_place_id_tool = {
 
949
  tools = [get_place_id_tool, get_competitors_data_tool, extract_data_tool]
950
 
951
  def message_to_gpt(message, history):
952
+ global chain_of_thought
953
+ chain_of_thought = []
954
  messages = [{"role": "system", "content": system_message}]
955
 
956
  # Build the message history
 
980
  print(f"Unexpected finish reason: {response.choices[0].finish_reason}")
981
  break # Or handle differently based on your needs
982
 
983
+ # Print the chain of thought for debugging/inspection
984
+ print("Chain of Thought:")
985
+ for step in chain_of_thought:
986
+ print(step)
987
+
988
  # Return the assistant's final response content
989
  return response.choices[0].message.content
990
 
991
  def handle_tool_call(message):
992
  tool_call = message.tool_calls[0]
 
993
  arguments = json.loads(tool_call.function.arguments)
994
 
995
  if tool_call.function.name == "extract_data":
 
1004
  elif tool_call.function.name == "get_place_id":
1005
  city = arguments.get("city")
1006
  tool_result = get_place_id(city)
 
1007
  response = {
1008
  "role": "tool",
1009
  "content": json.dumps({"place_id": tool_result}), # Return place_id as a JSON object
 
1031
 
1032
  return response
1033
 
1034
+ # This app is ready for Hugging Face Spaces. Environment variables are loaded from a .env file.
 
 
1035
  if __name__ == "__main__":
1036
+ gr.ChatInterface(fn=message_to_gpt, type="messages").launch(debug=True)
requirements.txt CHANGED
@@ -1 +1,6 @@
1
- huggingface_hub==0.25.2
 
 
 
 
 
 
1
+ huggingface_hub==0.25.2
2
+ gradio
3
+ openai
4
+ requests
5
+ beautifulsoup4
6
+ python-dotenv