| from pydantic import BaseModel,Field |
| from typing import TypedDict, Annotated |
| from langgraph.graph import MessagesState,StateGraph, START, END |
| from langchain_community.tools import TavilySearchResults |
| from langchain_core.messages import SystemMessage, HumanMessage, AIMessage |
| from langchain_community.document_loaders import WebBaseLoader |
| from langgraph.checkpoint.memory import MemorySaver |
|
|
|
|
| import operator |
| from setup import * |
|
|
| class GeneratorState(MessagesState): |
| context : Annotated[list, operator.add] |
| max_usecase : int |
| topic : str |
|
|
|
|
| class SearchQuery(BaseModel): |
| search_query : str = Field(description = 'Search query for web-search') |
|
|
|
|
| keyword_search = TavilySearchResults( |
| max_results=1, |
| search_depth="advanced", |
| include_answer=True, |
| include_raw_content=True, |
| include_images=True) |
|
|
|
|
|
|
| def search_web(state: GeneratorState): |
| |
| topic = state['topic'] |
| structured_llm = llm2.with_structured_output(SearchQuery) |
| |
| search_instructions = """You are an AI assistant specialized in generating effective internet search queries. Your goal is to create **precise, keyword-rich search queries** that retrieve the best results for **AI use cases in specific industries** using Tavily Search. |
| |
| ## **Instructions:** |
| - Extract the **industry name** from the user’s query. |
| - Generate a **focused search query** that retrieves **practical AI use cases** in that industry. |
| - Include keywords like **"applications," "use cases," "impact," or "case studies"** to refine the search. |
| - Prioritize sources like **research papers, industry reports, and authoritative tech sites**. |
| - Use **Google-style operators (e.g., `site:`) to focus on trusted sources** if applicable. |
| |
| --- |
| ## **Example:** |
| User Input: `"GenAI in healthcare"` |
| Generated Query: |
| "Generative AI use cases in healthcare applications and impact" |
| |
| Generate search query for the below: |
| {topic} |
| """ |
|
|
| search_prompt = search_instructions.format(topic=topic) |
| search_query = structured_llm.invoke(search_prompt) |
| exclude_domains = ["vktr.com"] |
| search_docs = tavily_search.invoke(search_query.search_query, exclude_domains=exclude_domains) |
| page_url = [doc['url'] for doc in search_docs] |
| loader = WebBaseLoader( |
| web_paths= page_url, |
| bs_get_text_kwargs={"separator": "|", "strip": True}, |
| raise_for_status=True, |
| ) |
| docs = loader.load() |
| formatted_search_docs = "\n\n---\n\n".join( |
| [ |
| f'<Document href="{doc.metadata["source"]}"/>\n{doc.page_content}\n</Document>' |
| for doc in docs |
| ]) |
| |
| summarization_prompt = '''You are an advanced summarization assistant. Your task is to generate a 500-word summary based on the provided context. |
| Maintain key information while removing redundancy, preserving critical details, and ensuring readability. Follow these guidelines: |
| Focus on Key Points: Extract essential facts, insights, and takeaways. |
| Maintain Clarity & Coherence: Ensure logical flow and readability. |
| Preserve Critical Data: Retain names, dates, figures, and important references. |
| Adjust Length as Needed: Summarize concisely while covering all vital aspects. |
| Format the summary professionally, adapting tone and detail to match the context. |
| context : {formatted_search_docs} |
| ''' |
| summarized_docs = llm2.invoke([SystemMessage(content=summarization_prompt)]) |
|
|
| return {'context': [summarized_docs]} |
|
|
|
|
|
|
| def generate_usecases(state: GeneratorState): |
| topic = state['topic'] |
| context = state['context'] |
| generation_prompt = ''' |
| You are a highly skilled technical writer. Your task is to consolidate insights from analyst memos into a structured report based on the given context and topic. Ensure the report includes a **brief introduction**, at least five AI use cases, and a **short conclusion**. Follow this format: |
| topic : {topic} |
| context: |
| {context} |
| |
| # Focus Title: [Provided Title] |
| |
| ## Introduction: |
| Provide a concise overview of the report's purpose and relevance. |
| |
| ## Use Case 1: [Descriptive Title] |
| **Objective/Use Case:** Summarize the goal in one or two sentences. |
| **AI Application:** Describe the AI technologies used. |
| |
| ### Cross-Functional Benefit: |
| - **[Department]:** [Benefit] |
| - **[Department]:** [Benefit] |
| |
| ## Use Case 2: [Descriptive Title] |
| (Repeat format) |
| |
| ## Conclusion: |
| Summarize key takeaways and potential future implications. |
| |
| Ensure clarity, relevance, and no duplicate citations in the **Sources** section. Extract insights accurately from the **context** provided.''' |
|
|
| system_message = generation_prompt.format(topic=topic, context=context) |
| answer = llm1.invoke([SystemMessage(content=system_message)]) |
|
|
| return {'messages': answer} |
|
|
|
|
|
|
| def graph(topic,max_analysts): |
| graph_builder = StateGraph(GeneratorState) |
|
|
| graph_builder.add_node('search_web', search_web) |
| graph_builder.add_node('usecase_generation', generate_usecases) |
|
|
| graph_builder.add_edge(START, 'search_web') |
| graph_builder.add_edge('search_web', 'usecase_generation') |
| graph_builder.add_edge('usecase_generation', END) |
|
|
| memory = MemorySaver() |
| graph = graph_builder.compile(checkpointer=memory) |
| config = {"configurable": {"thread_id": "1"}} |
| graph.invoke({"topic":topic, |
| "max_analysts":max_analysts}, |
| config) |
| |
| final_state = graph.get_state(config) |
| report = final_state.values['messages'][0].content |
| |
| return report |