Spaces:
Sleeping
Sleeping
| from langchain_community.tools import TavilySearchResults | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_core.prompts import PromptTemplate | |
| from tavily import TavilyClient | |
| import asyncio | |
| import sys | |
| import os | |
| from dotenv import load_dotenv | |
| # files | |
| from crawl import seo_crawling | |
| # Secret Key | |
| load_dotenv(override=True) | |
| tavily_api_key = os.getenv("TAVILY_API_KEY") | |
| gemini_api_key = os.getenv("GEMINI_API_KEY") | |
| llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp", api_key = gemini_api_key) # type: ignore | |
| # def tavily_search(query: str): | |
| # tavily = TavilySearchResults( | |
| # max_results=10, | |
| # search_depth="advanced", | |
| # include_answer=True, | |
| # include_images=True, | |
| # include_links=True, # type: ignore | |
| # api_key=tavily_api_key, # type: ignore | |
| # ) | |
| # results = tavily.invoke({"query": f"{query}"}) | |
| # return results | |
| # results = tavily_search("UK Air Source Heat Pump Market Trends 2025") | |
| # print(results) | |
| def tavily_search(query): | |
| tavily_client = TavilyClient(api_key = tavily_api_key) | |
| response = tavily_client.search(query, max_results=10) | |
| # print(response["results"]) | |
| return response["results"] | |
| def match_title(title, titles): | |
| title_prompt_template = PromptTemplate.from_template(""" | |
| Your task is to find the title in the List that semantically matches the User_title. | |
| - Don't change the title name | |
| - Don't give extra content. Only give the title name. | |
| - Only give **One title** | |
| List = {list} | |
| User_title = {title} | |
| """) | |
| prompt = title_prompt_template.invoke({"list": titles, "title": title}) | |
| response = llm.invoke(prompt) | |
| return response.content | |
| def keywords(content): | |
| keywords_prompt_template = PromptTemplate.from_template(""" | |
| # **Instruction:** | |
| Analyze the given text and extract keywords based on their relevance to SEO. Categorize them into the following three groups: | |
| ## **1. Primary Keywords (High-Impact, Industry-Specific):** | |
| - Broad, high-volume search terms that are directly related to the main topic. | |
| - Common industry terms that people search for when looking for services or information. | |
| - Maximum **10-12 keywords**. | |
| ## **2. Secondary Keywords (Supporting SEO & Long-Tail Queries):** | |
| - More specific, longer phrases related to the main topic. | |
| - Keywords that provide contextual depth and support for primary keywords. | |
| - Maximum **10-12 keywords**. | |
| ## **3. Local SEO Keywords (Boosting Regional Visibility):** | |
| - Keywords that include location-specific terms. | |
| - Phrases that help rank in local search results. | |
| - Maximum **5-8 keywords**. | |
| # Don't give the extra content only give the SEO keywords | |
| # **Input:** | |
| {text} | |
| """) | |
| prompt = keywords_prompt_template.invoke({"text": content}) | |
| response = llm.invoke(prompt) | |
| return response.content | |
| def seo_keywords(state): | |
| topic = state["final_topic"] | |
| # topic = state | |
| results = tavily_search(topic) | |
| titles = [] | |
| titles_url = [] | |
| final_url = "" | |
| for t in results: | |
| titles.append(t['title']) | |
| for t in results: | |
| titles_url.append({ | |
| "title": t['title'], | |
| "url": t['url'] | |
| }) | |
| print(titles) | |
| print(titles_url) | |
| text = match_title(topic, titles) | |
| print(text) | |
| for title in titles_url: | |
| if title['title'] == text: | |
| final_url = title['url'] | |
| print(final_url) | |
| crawled_content = asyncio.run(seo_crawling(final_url)) | |
| response = keywords(crawled_content) | |
| print(response) | |
| return{"seo_keywords":response} | |
| # a = seo_keywords("UK Air Source Heat Pump Market Trends 2025") | |
| # print(a) |