AhsanRazi commited on
Commit
35bcae0
·
verified ·
1 Parent(s): cab1e44

Update seo_keywords.py

Browse files
Files changed (1) hide show
  1. seo_keywords.py +138 -141
seo_keywords.py CHANGED
@@ -1,142 +1,139 @@
1
- from langchain_community.tools import TavilySearchResults
2
- from langchain_google_genai import ChatGoogleGenerativeAI
3
- from langchain_core.prompts import PromptTemplate
4
- from tavily import TavilyClient
5
- import asyncio
6
- import sys
7
-
8
- import os
9
- from dotenv import load_dotenv
10
-
11
- # files
12
- from crawl import seo_crawling
13
-
14
- # Secret Key
15
- load_dotenv(override=True)
16
- tavily_api_key = os.getenv("TAVILY_API_KEY")
17
- gemini_api_key = os.getenv("GEMINI_API_KEY")
18
-
19
-
20
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp", api_key = gemini_api_key) # type: ignore
21
-
22
-
23
- # def tavily_search(query: str):
24
-
25
- # tavily = TavilySearchResults(
26
- # max_results=10,
27
- # search_depth="advanced",
28
- # include_answer=True,
29
- # include_images=True,
30
- # include_links=True, # type: ignore
31
- # api_key=tavily_api_key, # type: ignore
32
- # )
33
-
34
- # results = tavily.invoke({"query": f"{query}"})
35
- # return results
36
-
37
- # results = tavily_search("UK Air Source Heat Pump Market Trends 2025")
38
- # print(results)
39
-
40
-
41
- def tavily_search(query):
42
- tavily_client = TavilyClient(api_key = tavily_api_key)
43
- response = tavily_client.search(query, max_results=10)
44
- # print(response["results"])
45
- return response["results"]
46
-
47
-
48
- def match_title(title, titles):
49
- title_prompt_template = PromptTemplate.from_template("""
50
- Your task is to find the title in the List that semantically matches the User_title.
51
- - Don't change the title name
52
- - Don't give extra content. Only give the title name.
53
- - Only give **One title**
54
-
55
- List = {list}
56
- User_title = {title}
57
-
58
- """)
59
-
60
- prompt = title_prompt_template.invoke({"list": titles, "title": title})
61
- response = llm.invoke(prompt)
62
- return response.content
63
-
64
-
65
- def keywords(content):
66
- keywords_prompt_template = PromptTemplate.from_template("""
67
- # **Instruction:**
68
- Analyze the given text and extract keywords based on their relevance to SEO. Categorize them into the following three groups:
69
-
70
- ## **1. Primary Keywords (High-Impact, Industry-Specific):**
71
- - Broad, high-volume search terms that are directly related to the main topic.
72
- - Common industry terms that people search for when looking for services or information.
73
- - Maximum **10-12 keywords**.
74
-
75
- ## **2. Secondary Keywords (Supporting SEO & Long-Tail Queries):**
76
- - More specific, longer phrases related to the main topic.
77
- - Keywords that provide contextual depth and support for primary keywords.
78
- - Maximum **10-12 keywords**.
79
-
80
- ## **3. Local SEO Keywords (Boosting Regional Visibility):**
81
- - Keywords that include location-specific terms.
82
- - Phrases that help rank in local search results.
83
- - Maximum **5-8 keywords**.
84
-
85
- # Don't give the extra content only give the SEO keywords
86
-
87
- # **Input:**
88
- {text}
89
-
90
- """)
91
-
92
-
93
- prompt = keywords_prompt_template.invoke({"text": content})
94
- response = llm.invoke(prompt)
95
- return response.content
96
-
97
-
98
-
99
-
100
- def seo_keywords(state):
101
-
102
- topic = state["final_topic"]
103
- # topic = state
104
-
105
- results = tavily_search(topic)
106
-
107
- titles = []
108
- titles_url = []
109
- final_url = ""
110
-
111
- for t in results:
112
- titles.append(t['title'])
113
-
114
- for t in results:
115
- titles_url.append({
116
- "title": t['title'],
117
- "url": t['url']
118
- })
119
-
120
- print(titles)
121
- print(titles_url)
122
-
123
- text = match_title(topic, titles)
124
- print(text)
125
-
126
-
127
- for title in titles_url:
128
- if title['title'] == text:
129
- final_url = title['url']
130
-
131
- print(final_url)
132
-
133
- if sys.platform == "win32":
134
- asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
135
-
136
- crawled_content = asyncio.run(seo_crawling(final_url))
137
- response = keywords(crawled_content)
138
- print(response)
139
- return{"seo_keywords":response}
140
-
141
- # a = seo_keywords("UK Air Source Heat Pump Market Trends 2025")
142
  # print(a)
 
1
+ from langchain_community.tools import TavilySearchResults
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from langchain_core.prompts import PromptTemplate
4
+ from tavily import TavilyClient
5
+ import asyncio
6
+ import sys
7
+
8
+ import os
9
+ from dotenv import load_dotenv
10
+
11
+ # files
12
+ from crawl import seo_crawling
13
+
14
+ # Secret Key
15
+ load_dotenv(override=True)
16
+ tavily_api_key = os.getenv("TAVILY_API_KEY")
17
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
18
+
19
+
20
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp", api_key = gemini_api_key) # type: ignore
21
+
22
+
23
+ # def tavily_search(query: str):
24
+
25
+ # tavily = TavilySearchResults(
26
+ # max_results=10,
27
+ # search_depth="advanced",
28
+ # include_answer=True,
29
+ # include_images=True,
30
+ # include_links=True, # type: ignore
31
+ # api_key=tavily_api_key, # type: ignore
32
+ # )
33
+
34
+ # results = tavily.invoke({"query": f"{query}"})
35
+ # return results
36
+
37
+ # results = tavily_search("UK Air Source Heat Pump Market Trends 2025")
38
+ # print(results)
39
+
40
+
41
+ def tavily_search(query):
42
+ tavily_client = TavilyClient(api_key = tavily_api_key)
43
+ response = tavily_client.search(query, max_results=10)
44
+ # print(response["results"])
45
+ return response["results"]
46
+
47
+
48
+ def match_title(title, titles):
49
+ title_prompt_template = PromptTemplate.from_template("""
50
+ Your task is to find the title in the List that semantically matches the User_title.
51
+ - Don't change the title name
52
+ - Don't give extra content. Only give the title name.
53
+ - Only give **One title**
54
+
55
+ List = {list}
56
+ User_title = {title}
57
+
58
+ """)
59
+
60
+ prompt = title_prompt_template.invoke({"list": titles, "title": title})
61
+ response = llm.invoke(prompt)
62
+ return response.content
63
+
64
+
65
+ def keywords(content):
66
+ keywords_prompt_template = PromptTemplate.from_template("""
67
+ # **Instruction:**
68
+ Analyze the given text and extract keywords based on their relevance to SEO. Categorize them into the following three groups:
69
+
70
+ ## **1. Primary Keywords (High-Impact, Industry-Specific):**
71
+ - Broad, high-volume search terms that are directly related to the main topic.
72
+ - Common industry terms that people search for when looking for services or information.
73
+ - Maximum **10-12 keywords**.
74
+
75
+ ## **2. Secondary Keywords (Supporting SEO & Long-Tail Queries):**
76
+ - More specific, longer phrases related to the main topic.
77
+ - Keywords that provide contextual depth and support for primary keywords.
78
+ - Maximum **10-12 keywords**.
79
+
80
+ ## **3. Local SEO Keywords (Boosting Regional Visibility):**
81
+ - Keywords that include location-specific terms.
82
+ - Phrases that help rank in local search results.
83
+ - Maximum **5-8 keywords**.
84
+
85
+ # Don't give the extra content only give the SEO keywords
86
+
87
+ # **Input:**
88
+ {text}
89
+
90
+ """)
91
+
92
+
93
+ prompt = keywords_prompt_template.invoke({"text": content})
94
+ response = llm.invoke(prompt)
95
+ return response.content
96
+
97
+
98
+
99
+
100
+ def seo_keywords(state):
101
+
102
+ topic = state["final_topic"]
103
+ # topic = state
104
+
105
+ results = tavily_search(topic)
106
+
107
+ titles = []
108
+ titles_url = []
109
+ final_url = ""
110
+
111
+ for t in results:
112
+ titles.append(t['title'])
113
+
114
+ for t in results:
115
+ titles_url.append({
116
+ "title": t['title'],
117
+ "url": t['url']
118
+ })
119
+
120
+ print(titles)
121
+ print(titles_url)
122
+
123
+ text = match_title(topic, titles)
124
+ print(text)
125
+
126
+
127
+ for title in titles_url:
128
+ if title['title'] == text:
129
+ final_url = title['url']
130
+
131
+ print(final_url)
132
+
133
+ crawled_content = asyncio.run(seo_crawling(final_url))
134
+ response = keywords(crawled_content)
135
+ print(response)
136
+ return{"seo_keywords":response}
137
+
138
+ # a = seo_keywords("UK Air Source Heat Pump Market Trends 2025")
 
 
 
139
  # print(a)