Spaces:
Sleeping
Sleeping
File size: 3,827 Bytes
35bcae0 0777aa4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
from langchain_community.tools import TavilySearchResults
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from tavily import TavilyClient
import asyncio
import sys
import os
from dotenv import load_dotenv
# files
from crawl import seo_crawling
# Secret Key
load_dotenv(override=True)
tavily_api_key = os.getenv("TAVILY_API_KEY")
gemini_api_key = os.getenv("GEMINI_API_KEY")
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp", api_key = gemini_api_key) # type: ignore
# def tavily_search(query: str):
# tavily = TavilySearchResults(
# max_results=10,
# search_depth="advanced",
# include_answer=True,
# include_images=True,
# include_links=True, # type: ignore
# api_key=tavily_api_key, # type: ignore
# )
# results = tavily.invoke({"query": f"{query}"})
# return results
# results = tavily_search("UK Air Source Heat Pump Market Trends 2025")
# print(results)
def tavily_search(query):
tavily_client = TavilyClient(api_key = tavily_api_key)
response = tavily_client.search(query, max_results=10)
# print(response["results"])
return response["results"]
def match_title(title, titles):
title_prompt_template = PromptTemplate.from_template("""
Your task is to find the title in the List that semantically matches the User_title.
- Don't change the title name
- Don't give extra content. Only give the title name.
- Only give **One title**
List = {list}
User_title = {title}
""")
prompt = title_prompt_template.invoke({"list": titles, "title": title})
response = llm.invoke(prompt)
return response.content
def keywords(content):
keywords_prompt_template = PromptTemplate.from_template("""
# **Instruction:**
Analyze the given text and extract keywords based on their relevance to SEO. Categorize them into the following three groups:
## **1. Primary Keywords (High-Impact, Industry-Specific):**
- Broad, high-volume search terms that are directly related to the main topic.
- Common industry terms that people search for when looking for services or information.
- Maximum **10-12 keywords**.
## **2. Secondary Keywords (Supporting SEO & Long-Tail Queries):**
- More specific, longer phrases related to the main topic.
- Keywords that provide contextual depth and support for primary keywords.
- Maximum **10-12 keywords**.
## **3. Local SEO Keywords (Boosting Regional Visibility):**
- Keywords that include location-specific terms.
- Phrases that help rank in local search results.
- Maximum **5-8 keywords**.
# Don't give the extra content only give the SEO keywords
# **Input:**
{text}
""")
prompt = keywords_prompt_template.invoke({"text": content})
response = llm.invoke(prompt)
return response.content
def seo_keywords(state):
topic = state["final_topic"]
# topic = state
results = tavily_search(topic)
titles = []
titles_url = []
final_url = ""
for t in results:
titles.append(t['title'])
for t in results:
titles_url.append({
"title": t['title'],
"url": t['url']
})
print(titles)
print(titles_url)
text = match_title(topic, titles)
print(text)
for title in titles_url:
if title['title'] == text:
final_url = title['url']
print(final_url)
crawled_content = asyncio.run(seo_crawling(final_url))
response = keywords(crawled_content)
print(response)
return{"seo_keywords":response}
# a = seo_keywords("UK Air Source Heat Pump Market Trends 2025")
# print(a) |