Spaces:
Sleeping
Sleeping
File size: 4,730 Bytes
764669a 148189b 764669a 2d3748f 9332d1a 45b9751 9332d1a 5cd9fd3 825032d 9332d1a 45b9751 20e8a32 148189b 764669a 2d3748f 45b9751 2d3748f 9332d1a 2d3748f 9332d1a 5cd9fd3 9332d1a 825032d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
from langchain.tools import Tool
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.document_loaders import WikipediaLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain_core.tools import tool
import base64
import os
from pydantic import BaseModel, Field
from openai import AzureOpenAI
from langchain_experimental.utilities import PythonREPL
from pathlib import Path
from dotenv import load_dotenv
# load environment variables
load_dotenv() # take environment variables
duckduck_tool = Tool(
name="duckduckgo_search",
func=DuckDuckGoSearchRun(),
description="Searches DuckDuckGo for information from the web."
)
wikipedia = WikipediaAPIWrapper(top_k_results=1,doc_content_chars_max=3000)
wikipedia_tool = WikipediaQueryRun(api_wrapper=wikipedia)
embeddings = AzureOpenAIEmbeddings(
model="text-embedding-3-large",
# dimensions: Optional[int] = None, # Can specify dimensions with new text-embedding-3 models
azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT"),
api_key = os.environ.get("AZURE_OPENAI_API_KEY"),
openai_api_version=os.environ.get("OPENAI_API_VERSION")
)
def wiki_RAG(query: str):
"""####"""
loader = WikipediaLoader(query=query, load_max_docs=5)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
doc_splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(
documents=doc_splits,
collection_name="wiki",
embedding=embeddings,
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 8})
results = retriever.invoke(query)
# return results
return "\n".join([doc.page_content for doc in results])
wiki_RAG_tool = Tool(
name="wikipedia_search_RAG",
func=wiki_RAG,
description="Searches information in wikipedia."
)
# image analyser
# create llm interface
llm_img = AzureChatOpenAI(
deployment_name = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME"),
openai_api_key = os.environ.get("AZURE_OPENAI_API_KEY"),
azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT"),
openai_api_version = os.environ.get("OPENAI_API_VERSION"),
temperature=0
)
class ImageAnalyserInput(BaseModel):
image_path: str = Field(description="path to file")
@tool("image-analyser", args_schema=ImageAnalyserInput)
def image_analyser_tool(image_path: str) -> str:
"""Analyzes an image and returns a description."""
with open(image_path, "rb") as image_file:
image_data = base64.b64encode(image_file.read()).decode("utf-8")
message = {
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image",
},
{
"type": "image",
"source_type": "base64",
"data": image_data,
"mime_type": "image/jpeg",
},
],
}
response = llm_img.invoke([message])
return response.content
# create wishper interface
client = AzureOpenAI(
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
api_version="2024-02-01",
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
)
class AudioTranscriberInput(BaseModel):
audio_path: str = Field(description="path to audio file")
@tool("audio-transcriber", args_schema=AudioTranscriberInput)
def audio_transcriber_tool(audio_path: str) -> str:
"""Receives path to audio file and returns text transcription of the audio recording."""
result = client.audio.transcriptions.create(
file=open(audio_path, "rb"),
model='whisper'
)
return result
class PythonExecutorInput(BaseModel):
script_path: str = Field(description="path to python script")
@tool("python-executor", args_schema=PythonExecutorInput)
def python_executor_tool(script_path: str) -> str:
"""Receives path to python script, execute the script and return result."""
# code execution tool
python_repl = PythonREPL()
result = python_repl.run(script_path)
return result
class OpenTextFilesInput(BaseModel):
script_path: str = Field(description="path to python script")
@tool("python-script-opener", args_schema=OpenTextFilesInput)
def python_script_opener(script_path: str) -> str:
"""Receives path to python script, returns the content of the file."""
file_content = Path(script_path).read_text()
return file_content
|