Spaces:
Sleeping
Sleeping
serp api for gscholar working
Browse files- .gitignore +42 -0
- app.py +163 -0
- better-call-saul-calling-card.jpg +0 -0
- chainlit.md +14 -0
- pyproject.toml +44 -0
- saul/tools.py +70 -0
- uv.lock +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Virtual environment
|
| 2 |
+
.venv/
|
| 3 |
+
venv/
|
| 4 |
+
|
| 5 |
+
# Environment variables
|
| 6 |
+
.env
|
| 7 |
+
|
| 8 |
+
# Python
|
| 9 |
+
__pycache__/
|
| 10 |
+
*.py[cod]
|
| 11 |
+
*$py.class
|
| 12 |
+
*.so
|
| 13 |
+
.Python
|
| 14 |
+
build/
|
| 15 |
+
develop-eggs/
|
| 16 |
+
dist/
|
| 17 |
+
downloads/
|
| 18 |
+
eggs/
|
| 19 |
+
.eggs/
|
| 20 |
+
lib/
|
| 21 |
+
lib64/
|
| 22 |
+
parts/
|
| 23 |
+
sdist/
|
| 24 |
+
var/
|
| 25 |
+
wheels/
|
| 26 |
+
*.egg-info/
|
| 27 |
+
.installed.cfg
|
| 28 |
+
*.egg
|
| 29 |
+
|
| 30 |
+
# IDE
|
| 31 |
+
.idea/
|
| 32 |
+
.vscode/
|
| 33 |
+
*.swp
|
| 34 |
+
*.swo
|
| 35 |
+
|
| 36 |
+
# Logs
|
| 37 |
+
*.log
|
| 38 |
+
logs/
|
| 39 |
+
.chainlit/*
|
| 40 |
+
|
| 41 |
+
**/.DS_Store
|
| 42 |
+
.DS_Store
|
app.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict, TypedDict, Union, Annotated
|
| 2 |
+
import chainlit as cl
|
| 3 |
+
from langgraph.graph import StateGraph, END
|
| 4 |
+
from langgraph.graph.message import add_messages
|
| 5 |
+
from langgraph.prebuilt import ToolNode
|
| 6 |
+
from langchain_openai import ChatOpenAI
|
| 7 |
+
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
|
| 8 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 9 |
+
from langchain_core.tools import BaseTool
|
| 10 |
+
|
| 11 |
+
from operator import itemgetter
|
| 12 |
+
from pydantic import BaseModel, Field, ConfigDict
|
| 13 |
+
from saul.tools import tools
|
| 14 |
+
|
| 15 |
+
from loguru import logger
|
| 16 |
+
import json
|
| 17 |
+
from dotenv import load_dotenv
|
| 18 |
+
|
| 19 |
+
load_dotenv()
|
| 20 |
+
|
| 21 |
+
# Types for our nodes
|
| 22 |
+
class AgentState(TypedDict):
|
| 23 |
+
"""State for the research agent."""
|
| 24 |
+
messages: Annotated[list, add_messages]
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# Initialize the LLM
|
| 28 |
+
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, streaming=True)
|
| 29 |
+
|
| 30 |
+
# bind tools to the llm
|
| 31 |
+
llm = llm.bind_tools(tools)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# Agent node implementation
|
| 35 |
+
async def call_model(state: AgentState) -> Dict:
|
| 36 |
+
"""Agent node that decides which tool to use."""
|
| 37 |
+
logger.success(f"Calling agent model with state: {state}")
|
| 38 |
+
# print("...........................................Calling agent model...........................................")
|
| 39 |
+
# print(f"State:: {state}\n\n")
|
| 40 |
+
response = llm.invoke(state["messages"])
|
| 41 |
+
return {"messages": [response]}
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
execute_tool = ToolNode(tools)
|
| 45 |
+
|
| 46 |
+
# Create the graph
|
| 47 |
+
uncompiled_graph = StateGraph(AgentState)
|
| 48 |
+
|
| 49 |
+
# Add nodes
|
| 50 |
+
uncompiled_graph.add_node("agent", call_model)
|
| 51 |
+
uncompiled_graph.add_node("action", execute_tool)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# conditional edge function
|
| 55 |
+
def should_continue(state):
|
| 56 |
+
last_message = state["messages"][-1]
|
| 57 |
+
|
| 58 |
+
if last_message.tool_calls:
|
| 59 |
+
return "action"
|
| 60 |
+
|
| 61 |
+
return END
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# Add edges
|
| 65 |
+
uncompiled_graph.add_conditional_edges("agent", should_continue)
|
| 66 |
+
uncompiled_graph.add_edge("action", "agent")
|
| 67 |
+
|
| 68 |
+
# Set entry point
|
| 69 |
+
uncompiled_graph.set_entry_point("agent")
|
| 70 |
+
|
| 71 |
+
# Compile the graph
|
| 72 |
+
compiled_graph = uncompiled_graph.compile()
|
| 73 |
+
|
| 74 |
+
system_prompt = """You are a helpful legal research assistant.
|
| 75 |
+
Only answer questions that are related to legal research, else politely decline to answer.
|
| 76 |
+
Only answer the last question.
|
| 77 |
+
"""
|
| 78 |
+
|
| 79 |
+
@cl.on_chat_start
|
| 80 |
+
async def start():
|
| 81 |
+
"""Initialize the chat session."""
|
| 82 |
+
# Initialize session state
|
| 83 |
+
initial_state = AgentState(
|
| 84 |
+
messages=[SystemMessage(content=system_prompt)]
|
| 85 |
+
)
|
| 86 |
+
image = cl.Image(path="./better-call-saul-calling-card.jpg")
|
| 87 |
+
|
| 88 |
+
# Set initial state in session
|
| 89 |
+
cl.user_session.set("state", initial_state)
|
| 90 |
+
|
| 91 |
+
await cl.Message(
|
| 92 |
+
content="""### Better Call Agentic-Saul
|
| 93 |
+
🫵 Yeah! I'm Saul, your legal research assistant.
|
| 94 |
+
I'm here to help you with your legal research needs.
|
| 95 |
+
|
| 96 |
+
I will find information from:
|
| 97 |
+
- 📄 Legal Glossary - legal terms and definitions
|
| 98 |
+
- 📚 Wikipedia - basic information
|
| 99 |
+
- 💬 Reddit discussions - current chatter in social media
|
| 100 |
+
- 📖 Google Scholar Case Law - judicial opinions from numerous federal and state courts
|
| 101 |
+
|
| 102 |
+
What would you like to research?""",
|
| 103 |
+
elements=[image],
|
| 104 |
+
|
| 105 |
+
).send()
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
@cl.on_message
|
| 109 |
+
async def main(message: cl.Message):
|
| 110 |
+
"""Handle incoming messages."""
|
| 111 |
+
# Get current session state
|
| 112 |
+
state_dict = cl.user_session.get("state")
|
| 113 |
+
state = AgentState(**state_dict)
|
| 114 |
+
|
| 115 |
+
# Update messages in state
|
| 116 |
+
state["messages"].append(HumanMessage(content=message.content))
|
| 117 |
+
inputs = {"messages": state["messages"]}
|
| 118 |
+
# try:
|
| 119 |
+
msg = cl.Message(content="")
|
| 120 |
+
# Run the graph with current state
|
| 121 |
+
async for chunk in compiled_graph.astream(inputs, stream_mode="updates"):
|
| 122 |
+
for node, values in chunk.items():
|
| 123 |
+
|
| 124 |
+
logger.success(f"Receiving update from node: {node}")
|
| 125 |
+
# print(f"-------------- Receiving update from node: '{node}' --------------")
|
| 126 |
+
await msg.stream_token(f"Receiving update from node: **{node}**\n")
|
| 127 |
+
if node == "action":
|
| 128 |
+
for tool_msg in values["messages"]:
|
| 129 |
+
output = f"Tool used: {tool_msg.name}"
|
| 130 |
+
# output += f"\nTool output: {tool_msg.content}"
|
| 131 |
+
logger.success(output)
|
| 132 |
+
# print(output)
|
| 133 |
+
await msg.stream_token(f"{output}\n\n")
|
| 134 |
+
else: # node == "agent"
|
| 135 |
+
if values["messages"][0].tool_calls:
|
| 136 |
+
tool_names = [tool["name"] for tool in values["messages"][0].tool_calls]
|
| 137 |
+
output = f"Tool(s) Selected: {', '.join(tool_names)}"
|
| 138 |
+
logger.success(output)
|
| 139 |
+
# print(output)
|
| 140 |
+
await msg.stream_token(f"{output}\n\n")
|
| 141 |
+
else:
|
| 142 |
+
# output = f"\n\n\n**Final Model output**: {values['messages'][-1].content}"
|
| 143 |
+
output = "\n**Final output**\n"
|
| 144 |
+
logger.success(output)
|
| 145 |
+
# print(output)
|
| 146 |
+
print(values["messages"][-1].content)
|
| 147 |
+
await msg.stream_token(f"{output}")
|
| 148 |
+
# await msg.stream_token(values["messages"][-1].content)
|
| 149 |
+
print("\n\n")
|
| 150 |
+
|
| 151 |
+
# stream messages to the UI
|
| 152 |
+
if token := values["messages"][-1].content:
|
| 153 |
+
await msg.stream_token(token)
|
| 154 |
+
|
| 155 |
+
# Update messages in state
|
| 156 |
+
# state["messages"].extend(values["messages"])
|
| 157 |
+
# msg = cl.Message(content=values["messages"][-1].content)
|
| 158 |
+
# await message.send()
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
# Update session state
|
| 162 |
+
cl.user_session.set("state", state)
|
| 163 |
+
|
better-call-saul-calling-card.jpg
ADDED
|
chainlit.md
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Welcome to Chainlit! 🚀🤖
|
| 2 |
+
|
| 3 |
+
Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
|
| 4 |
+
|
| 5 |
+
## Useful Links 🔗
|
| 6 |
+
|
| 7 |
+
- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
|
| 8 |
+
- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
|
| 9 |
+
|
| 10 |
+
We can't wait to see what you create with Chainlit! Happy coding! 💻😊
|
| 11 |
+
|
| 12 |
+
## Welcome screen
|
| 13 |
+
|
| 14 |
+
To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
|
pyproject.toml
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "agentic-saul"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "A legal research assistant powered by LangGraph and Chainlit"
|
| 5 |
+
requires-python = ">=3.9,<3.12"
|
| 6 |
+
readme = "README.md"
|
| 7 |
+
license = { text = "MIT" }
|
| 8 |
+
dependencies = [
|
| 9 |
+
"chainlit~=2.0.4",
|
| 10 |
+
"langgraph~=0.2.67",
|
| 11 |
+
"langchain~=0.3.15",
|
| 12 |
+
"langchain-community~=0.3.16",
|
| 13 |
+
"langchain-openai~=0.3.2",
|
| 14 |
+
"wikipedia~=1.4.0",
|
| 15 |
+
"praw~=7.8.1",
|
| 16 |
+
"semanticscholar~=0.9.0",
|
| 17 |
+
"python-dotenv~=1.0.1",
|
| 18 |
+
"websockets>=14.2",
|
| 19 |
+
"google-search-results~=2.4.2",
|
| 20 |
+
"loguru~=0.7.3",
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
[project.optional-dependencies]
|
| 24 |
+
dev = [
|
| 25 |
+
"ruff~=0.3.3",
|
| 26 |
+
"black~=24.2.0",
|
| 27 |
+
"mypy~=1.9.0",
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
[tool.setuptools]
|
| 31 |
+
packages = ["saul"]
|
| 32 |
+
|
| 33 |
+
[tool.ruff]
|
| 34 |
+
select = ["E", "F", "I", "N", "W", "B"]
|
| 35 |
+
line-length = 100
|
| 36 |
+
|
| 37 |
+
[tool.black]
|
| 38 |
+
line-length = 100
|
| 39 |
+
target-version = ["py39"]
|
| 40 |
+
|
| 41 |
+
[tool.mypy]
|
| 42 |
+
python_version = "3.9"
|
| 43 |
+
strict = true
|
| 44 |
+
ignore_missing_imports = true
|
saul/tools.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
import os
|
| 3 |
+
from langchain_community.tools import tool
|
| 4 |
+
|
| 5 |
+
from langchain_community.tools.reddit_search.tool import RedditSearchRun
|
| 6 |
+
from langchain_community.utilities.reddit_search import RedditSearchAPIWrapper
|
| 7 |
+
from langchain_community.tools.semanticscholar.tool import SemanticScholarQueryRun
|
| 8 |
+
from langchain_community.tools import WikipediaQueryRun
|
| 9 |
+
from langchain_community.utilities import WikipediaAPIWrapper
|
| 10 |
+
# from langchain_community.tools.google_scholar import GoogleScholarQueryRun
|
| 11 |
+
# from langchain_community.utilities.google_scholar import GoogleScholarAPIWrapper
|
| 12 |
+
from serpapi import GoogleSearch
|
| 13 |
+
|
| 14 |
+
from loguru import logger
|
| 15 |
+
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
|
| 19 |
+
semantic_scholar_tool = SemanticScholarQueryRun()
|
| 20 |
+
# google_scholar_tool = GoogleScholarQueryRun(api_wrapper=GoogleScholarAPIWrapper())
|
| 21 |
+
reddit_tool = RedditSearchRun(
|
| 22 |
+
api_wrapper=RedditSearchAPIWrapper(
|
| 23 |
+
client_id=os.getenv("REDDIT_CLIENT_ID"),
|
| 24 |
+
client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
|
| 25 |
+
user_agent=os.getenv("REDDIT_USER_AGENT")
|
| 26 |
+
)
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Google Scholar tool
|
| 30 |
+
@tool
|
| 31 |
+
def google_scholar_tool(query: str, top_k: int = 10) -> str:
|
| 32 |
+
"""Search Google Scholar Case Law for judicial opinions from numerous federal and state courts in the US about the given query."""
|
| 33 |
+
params = {
|
| 34 |
+
"q": query,
|
| 35 |
+
"api_key": os.getenv("SERP_API_KEY"),
|
| 36 |
+
"engine": "google_scholar",
|
| 37 |
+
"hl": "en",
|
| 38 |
+
"as_sdt": "4",
|
| 39 |
+
}
|
| 40 |
+
logger.debug(f"Google Scholar params: {params}")
|
| 41 |
+
search = GoogleSearch(params)
|
| 42 |
+
results = search.get_dict().get("organic_results", [])
|
| 43 |
+
logger.debug(f"Google Scholar results: {results}")
|
| 44 |
+
if not results:
|
| 45 |
+
return "No good Google Scholar results found."
|
| 46 |
+
|
| 47 |
+
# Format the results
|
| 48 |
+
formatted_results = [
|
| 49 |
+
f"Title: {result.get('title', '')}\n"
|
| 50 |
+
f"Snippet: {result.get('snippet', '')}\n"
|
| 51 |
+
f"Summary: {result.get('publication_info', {}).get('summary', '')}"
|
| 52 |
+
for result in results[:top_k] # Limit to top_k results
|
| 53 |
+
]
|
| 54 |
+
logger.info(f"Google Scholar results: {formatted_results}")
|
| 55 |
+
return "\n\n".join(formatted_results)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# add "rag" tool
|
| 59 |
+
@tool
|
| 60 |
+
def rag_tool():
|
| 61 |
+
"""RAG tool."""
|
| 62 |
+
pass
|
| 63 |
+
|
| 64 |
+
# Initialize tools
|
| 65 |
+
tools = [
|
| 66 |
+
wikipedia_tool,
|
| 67 |
+
reddit_tool,
|
| 68 |
+
google_scholar_tool,
|
| 69 |
+
# rag_tool()
|
| 70 |
+
]
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|