Spaces:

g3eIL
/

InfiAgent

No application file

App Files Files Community

g3eIL commited on Apr 19, 2024

Commit

77320e4

verified ·

1 Parent(s): 891350c

Upload 80 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +131 -0
Dockerfile +3 -0
activities/activity_helpers.py +33 -0
activities/api.py +93 -0
activities/complete_chat.py +77 -0
activities/eval.py +207 -0
activities/local_demo.py +108 -0
activities/local_test.py +87 -0
activities/predict.py +41 -0
activities/vllm_api_server.py +636 -0
configs/agent_configs/react_agent_azureopenai_gpt_35_turbo_async.yaml +23 -0
configs/agent_configs/react_agent_azureopenai_gpt_4_async.yaml +23 -0
configs/agent_configs/react_agent_azureopenai_gpt_4_async_dcoker.yaml +23 -0
configs/agent_configs/react_agent_gpt4_async.yaml +23 -0
configs/agent_configs/react_agent_llama_async.yaml +23 -0
configs/agent_configs/react_agent_opt_async.yaml +23 -0
configs/tool_configs/async_python_code_sandbox.yaml +7 -0
configs/tool_configs/async_python_code_sandbox_docker.yaml +7 -0
run.sh +3 -0
run_demo.sh +5 -0
run_local.sh +4 -0
setup.py +40 -0
src/infiagent/__init__.py +0 -0
src/infiagent/agent/__init__.py +2 -0
src/infiagent/agent/base_agent.py +337 -0
src/infiagent/agent/react/__init__.py +4 -0
src/infiagent/agent/react/async_react_agent.py +299 -0
src/infiagent/conversation_sessions/__init__.py +1 -0
src/infiagent/conversation_sessions/code_interpreter_session.py +87 -0
src/infiagent/exceptions/__init__.py +0 -0
src/infiagent/exceptions/exceptions.py +46 -0
src/infiagent/llm/__init__.py +5 -0
src/infiagent/llm/base_llm.py +36 -0
src/infiagent/llm/client/__init__.py +0 -0
src/infiagent/llm/client/azure_openai.py +346 -0
src/infiagent/llm/client/llama.py +377 -0
src/infiagent/llm/client/openai.py +306 -0
src/infiagent/llm/client/opt.py +373 -0
src/infiagent/prompt/__init__.py +3 -0
src/infiagent/prompt/prompt_template.py +83 -0
src/infiagent/prompt/simple_react_prompt.py +17 -0
src/infiagent/prompt/zero_shot_react_prompt.py +36 -0
src/infiagent/schemas/__init__.py +5 -0
src/infiagent/schemas/agent_models.py +148 -0
src/infiagent/schemas/base_models.py +0 -0
src/infiagent/schemas/complete_models.py +236 -0
src/infiagent/schemas/llm_models.py +91 -0
src/infiagent/schemas/sandbox_models.py +69 -0
src/infiagent/services/__init__.py +0 -0
src/infiagent/services/chat_complete_service.py +196 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,131 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+build/doctrees
+build/html
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# JetBrains PyCharm specific
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, GoLand, Rider and Android Studio
+.idea/
+*.iml
+# User-specific stuff
+*.swp
+*~
+.Session.vim
+/.sass-cache

Dockerfile ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ FROM python:3
2	+
3	+ RUN pip install pandas numpy scikit-learn matplotlib seaborn

activities/activity_helpers.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import json
+from sse_starlette import ServerSentEvent
+from infiagent.schemas import ResponseBaseData
+IGNORE_PING_COMMENT = {"comment": "IGNORE PING"}
+DONE = "[DONE]"
+async def async_sse_response_format(response_data_gen):
+    async for content in response_data_gen:
+        if content == DONE:
+            sse_event = ServerSentEvent(data=DONE)
+        else:
+            data_dict = {
+                "response": content,
+                "ResponseBase": ResponseBaseData().dict()
+            }
+            sse_event = ServerSentEvent(data=json.dumps(data_dict, ensure_ascii=False))
+        yield sse_event
+def json_response_format(content):
+    return {
+        "response": content,
+        "ResponseBase": ResponseBaseData().dict()
+    }
+def get_ignore_ping_comment():
+    return lambda: ServerSentEvent(**IGNORE_PING_COMMENT)

activities/api.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import asyncio
+import uuid
+import uvloop
+from dotenv import load_dotenv
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from sse_starlette.sse import EventSourceResponse, ServerSentEvent
+from starlette.responses import JSONResponse, Response
+from .activity_helpers import DONE
+from .complete_chat import complete_chat_router
+from .predict import predict_router
+try:
+    import infiagent
+    from infiagent.schemas import FailedResponseBaseData
+    from infiagent.utils import get_logger, init_logging, log_id_var
+except ImportError:
+    print("import infiagent failed, please install infiagent by 'pip install .' in the pipeline directory of ADA-Agent")
+    from ..schemas import FailedResponseBaseData
+    from ..utils import get_logger, init_logging, log_id_var
+asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+SSE_API_PATHS = ["/complete_sse"]
+LOG_ID_HEADER_NAME = "X-Tt-Logid"
+load_dotenv()
+init_logging()
+logger = get_logger()
+app = FastAPI()
+origins = ["*"]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.include_router(complete_chat_router)
+app.include_router(predict_router)
+@app.middleware("http")
+async def log_id_middleware(request: Request, call_next):
+    # Get X-Tt-Logid from request headers
+    log_id = request.headers.get(LOG_ID_HEADER_NAME)
+    if not log_id:
+        # Generate a log_id if not present in headers
+        log_id = str(uuid.uuid4())
+    log_id_var.set(log_id)
+    response: Response = await call_next(request)
+    response.headers[LOG_ID_HEADER_NAME] = log_id_var.get()
+    return response
+@app.exception_handler(Exception)
+async def general_exception_handler(request, exc):
+    error_msg = "Failed to handle request. Internal Server error: {}".format(str(exc))
+    logger.error(error_msg, exc_info=True)
+    if request.url.path in SSE_API_PATHS:
+        return EventSourceResponse(ServerSentEvent(data=DONE))
+    else:
+        return JSONResponse(
+            status_code=500,
+            content={
+                "response": error_msg,
+                "ResponseBase": FailedResponseBaseData().dict()
+            }
+        )
+@app.exception_handler(HTTPException)
+async def http_exception_handler(request, exc):
+    error_msg = "Failed to handle request. Error: {}".format(exc.detail)
+    logger.error(error_msg, exc_info=True)
+    if request.url.path in SSE_API_PATHS:
+        return EventSourceResponse(ServerSentEvent(data=DONE))
+    else:
+        return JSONResponse(
+            status_code=exc.status_code,
+            content={
+                "response": error_msg,
+                "ResponseBase": FailedResponseBaseData().dict()
+            }
+        )

activities/complete_chat.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from fastapi import APIRouter, Request, HTTPException
+from pydantic import ValidationError
+from sse_starlette import EventSourceResponse, ServerSentEvent
+from .activity_helpers import async_sse_response_format, IGNORE_PING_COMMENT, json_response_format
+try:
+    import infiagent
+    from infiagent.db.conversation_dao import ConversationDAO
+    from infiagent.schemas import ChatCompleteRequest
+    from infiagent.services.chat_complete_sse_service import chat_event_generator, chat_event_response
+    from infiagent.tools.code_sandbox.async_sandbox_client import AsyncSandboxClient
+    from infiagent.utils import get_logger
+except ImportError:
+    print("import infiagent failed, please install infiagent by 'pip install .' in the pipeline directory of ADA-Agent")
+    from ..db.conversation_dao import ConversationDAO
+    from ..schemas import ChatCompleteRequest
+    from ..services.chat_complete_sse_service import chat_event_generator, chat_event_response
+    from ..tools.code_sandbox.async_sandbox_client import AsyncSandboxClient
+    from ..utils import get_logger
+complete_chat_router = APIRouter()
+logger = get_logger()
+@complete_chat_router.post("/complete_sse")
+async def complete_sse(request: Request):
+    body_str = await request.body()
+    try:
+        chat_request = ChatCompleteRequest.parse_raw(body_str)
+        logger.info("Got chat request: {}".format(chat_request))
+    except ValidationError as e:
+        error_msg = "Invalid input chat_request. Error: {}".format(str(e))
+        raise HTTPException(status_code=400, detail=error_msg)
+    return EventSourceResponse(async_sse_response_format(chat_event_generator(chat_request)),
+                               ping_message_factory=lambda: ServerSentEvent(**IGNORE_PING_COMMENT))
+@complete_chat_router.post("/complete")
+async def complete(request: Request):
+    body_str = await request.body()
+    try:
+        chat_request = ChatCompleteRequest.parse_raw(body_str)
+        logger.info("Got chat request: {}".format(chat_request))
+    except ValidationError as e:
+        error_msg = "Invalid input chat_request. Error: {}".format(str(e))
+        raise HTTPException(status_code=400, detail=error_msg)
+    response_items = await chat_event_response(chat_request)
+    return json_response_format(response_items)
+@complete_chat_router.get("/heartbeat")
+async def heartbeat(chat_id: str = None, session_id: str = None):
+    if not chat_id and not session_id:
+        raise HTTPException(status_code=400, detail="Either chat_id or session_id must be provided.")
+    input_chat_id = chat_id or session_id
+    conversation = await ConversationDAO.get_conversation(input_chat_id)
+    if not conversation:
+        logger.info(f'Call heartbeat on a non-exist conversion, {input_chat_id}')
+        return json_response_format("conversation is not created, skip")
+    if conversation.sandbox_id is None:
+        logger.error(f'No sandbox id for heartbeat, chat id {input_chat_id}')
+        raise HTTPException(status_code=404, detail=f'No sandbox id for heartbeat, chat id {input_chat_id}')
+    # TODO Add exception handling logic here for heartbeat failed in sandbox side
+    heartbeat_response = await AsyncSandboxClient(conversation.sandbox_id).heartbeat()
+    logger.info(f"Heartbeat response {heartbeat_response}")
+    return json_response_format("succeed")

activities/eval.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import os
+import re
+import argparse
+import asyncio
+import logging
+import sys
+import json
+import io
+import openai
+import infiagent
+from infiagent.utils import get_logger, upload_files, get_file_name_and_path
+from infiagent.services.chat_complete_service import predict
+logger = get_logger()
+class UploadedFile(io.BytesIO):
+    def __init__(self, path):
+        with open(path, 'rb') as file:
+            data = file.read()
+        super().__init__(data)
+        self.name = path.split("/")[-1]  # 获取文件名
+        self.type = 'application/octet-stream'  # 或者其他适当的 MIME 类型
+        self.size = len(data)
+    def __repr__(self):
+        return f"MyUploadedFile(name={self.name}, size={self.size}, type={self.type})"
+    def __len__(self):
+        return self.size
+# # 使用例子
+# file_path = "path/to/your/file"
+# uploaded_file = MyUploadedFile(file_path)
+# print(uploaded_file)
+def _get_script_params():
+    try:
+        parser = argparse.ArgumentParser()
+        parser.add_argument('--llm',
+                            help='LLM Model for demo',
+                            required=False, type=str)
+        parser.add_argument('--api_key',
+                            help='Open API token key.',
+                            required=False, type=str)
+        parser.add_argument('--config_path',
+                            help='Config path for demo',
+                            default="configs/agent_configs/react_agent_llama_async.yaml",
+                            required=False, type=str)
+        args = parser.parse_args()
+        return args
+    except Exception as e:
+        logger.error("Failed to get script input arguments: {}".format(str(e)), exc_info=True)
+    return None
+def extract_questions_and_concepts(file_path):
+    # Read the content of the text file
+    with open(file_path, 'r') as file:
+        content = file.read()
+    # Use regular expressions to extract questions and concepts
+    pattern = r'\\Question{(.*?)}\s*\\Concepts{(.*?)}'
+    matches = re.findall(pattern, content, re.DOTALL)
+    # Build a list of dictionaries containing the questions and concepts
+    data = []
+    for match in matches:
+        question = match[0].strip()
+        concepts = [concept.strip() for concept in match[1].split(',')]
+        data.append({
+            'question': question,
+            'concepts': concepts
+        })
+    return data
+def read_dicts_from_file(file_name):
+    """
+    Read a file with each line containing a JSON string representing a dictionary,
+    and return a list of dictionaries.
+    :param file_name: Name of the file to read from.
+    :return: List of dictionaries.
+    """
+    dict_list = []
+    with open(file_name, 'r') as file:
+        for line in file:
+            # Convert the JSON string back to a dictionary.
+            dictionary = json.loads(line.rstrip('\n'))
+            dict_list.append(dictionary)
+    return dict_list
+def read_questions(file_path):
+    print(file_path)
+    with open(file_path) as f:
+        questions = json.load(f)
+    return questions
+def extract_data_from_folder(folder_path):
+    print(f'folder_path {folder_path}')
+    extracted_data = {}
+    # Traverse the files in the folder
+    for file_name in os.listdir(folder_path):
+        if file_name.endswith('.questions'):  # You can filter files based on their type
+            file_path = os.path.join(folder_path, file_name)
+            file_data = read_questions(file_path)
+            file_name_without_extension = os.path.splitext(file_name)[0]
+            extracted_data[file_name_without_extension] = file_data
+    return extracted_data
+async def main():
+    extracted_data = read_dicts_from_file('./data/da-dev-questions.jsonl')
+    args = _get_script_params()
+    model_name = getattr(args, "llm", None)
+    open_ai_key = getattr(args, "api_key", None)
+    if "OPEN_AI" in model_name:
+        logger.info("setup open ai ")
+        if os.environ.get("OPENAI_API_KEY") is None:
+            if open_ai_key:
+                openai.api_key = open_ai_key
+                os.environ["OPENAI_API_KEY"] = open_ai_key
+            else:
+                raise ValueError("OPENAI_API_KEY is None, please provide open ai key to use open ai model. Adding "
+                                 "'--api_key' to set it up")
+        # 获取 'openai' 的 logger
+        openai_logger = logging.getLogger('openai')
+        # 设置日志级别为 'WARNING'，这样 'INFO' 级别的日志就不会被打印了
+        openai_logger.setLevel(logging.WARNING)
+    else:
+        logger.info("use local model ")
+    table_path = 'data/da-dev-tables'
+    results = []
+    i = 1
+    for q in extracted_data:
+        input_text = q['question']
+        concepts = q['concepts']
+        file_path = q['file_name']
+        constraints = q['constraints']
+        format = q['format']
+        file_path = os.path.join(table_path, file_path)
+        print(f'input_text: {input_text}')
+        print(f'concepts: {concepts}')
+        print(f'file_path: {file_path}')
+        uploaded_file = UploadedFile(file_path)
+        print(uploaded_file)
+        prompt = f"Question: {input_text}\n{constraints}\n"
+        response = await predict(
+            prompt=prompt,
+            model_name=model_name,
+            config_path=args.config_path,
+            uploaded_files=[uploaded_file]
+        )
+        iteration_result = {
+            'id': q['id'],
+            'input_text': prompt,
+            'concepts': concepts,
+            'file_path': file_path,
+            'response': response,
+            'format': format
+        }
+        results.append(iteration_result)
+        print(f"response: {response}")
+        if i % 10 == 0:
+            with open('results_{}.json'.format(model_name), 'w') as outfile:
+                json.dump(results, outfile, indent=4)
+        i += 1
+    with open('results_{}.json'.format(model_name), 'w') as outfile:
+        json.dump(results, outfile, indent=4)
+if __name__ == '__main__':
+    asyncio.run(main())
+    # main()

activities/local_demo.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import argparse
+import asyncio
+import logging
+import os
+import sys
+import streamlit as st  # type: ignore
+import uvloop
+import openai
+try:
+    import infiagent
+    from infiagent.utils import get_logger, upload_files
+    from infiagent.services.chat_complete_service import predict
+except ImportError:
+    raise (
+        "import infiagent failed, please install infiagent by 'pip install -e .' in the pipeline directory of ADA-Agent")
+logger = get_logger()
+asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+def _get_script_params():
+    try:
+        parser = argparse.ArgumentParser()
+        parser.add_argument('--llm',
+                            help='LLM Model for demo',
+                            required=False, type=str)
+        parser.add_argument('--api_key',
+                            help='Open API token key.',
+                            required=False, type=str)
+        parser.add_argument('--config_path',
+                            help='Config path for demo',
+                            # default="configs/agent_configs/react_agent_gpt4_async.yaml",
+                            required=False, type=str)
+        args = parser.parse_args()
+        return args
+    except Exception as e:
+        logger.error("Failed to get script input arguments: {}".format(str(e)), exc_info=True)
+    return None
+async def main():
+    args = _get_script_params()
+    model_name = getattr(args, "llm", None)
+    open_ai_key = getattr(args, "api_key", None)
+    config_path = getattr(args, "config_path", None)
+    if "OPEN_AI" in model_name:
+        logger.info("setup open ai ")
+        if os.environ.get("OPENAI_API_KEY") is None:
+            if open_ai_key:
+                openai.api_key = open_ai_key
+                os.environ["OPENAI_API_KEY"] = open_ai_key
+            else:
+                raise ValueError(
+                    "OPENAI_API_KEY is None, please provide opekn ai key to use open ai model. Adding '--api_key' to set it up")
+        # 获取 'openai' 的 logger
+        openai_logger = logging.getLogger('openai')
+        # 设置日志级别为 'WARNING'，这样 'INFO' 级别的日志就不会被打印了
+        openai_logger.setLevel(logging.WARNING)
+    else:
+        logger.info("use local model ")
+    st.set_page_config(layout="centered")
+    st.title("InfiAgent Code Interpreter Demo 🚀")
+    # Initialize session state variables if not already present
+    if 'chat_history' not in st.session_state:
+        st.session_state.chat_history = []
+    # UI components
+    input_text = st.text_area("Write your prompt")
+    uploaded_files = st.file_uploader("Upload your files", accept_multiple_files=True)
+    button_pressed = st.button("Run code interpreter", use_container_width=True)
+    # When button is pressed
+    if button_pressed and input_text != "":
+        # Add user message to chat history
+        st.session_state.chat_history.append({"role": "user", "message": input_text})
+        # Predict response (assuming you have the necessary async handling)
+        response = await predict(
+            prompt=input_text,
+            model_name=model_name,
+            config_path=config_path,
+            uploaded_files=uploaded_files,
+        )
+        # Add assistant message to chat history
+        st.session_state.chat_history.append({"role": "assistant", "message": response})
+    # Display chat history
+    for chat in st.session_state.chat_history:
+        with st.chat_message(chat["role"]):
+            st.write(chat["message"])
+if __name__ == "__main__":
+    asyncio.run(main())

activities/local_test.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import json
+from fastapi import FastAPI, HTTPException, Request
+from pydantic import ValidationError
+from sse_starlette import EventSourceResponse
+from .activity_helpers import (
+    async_sse_response_format,
+    get_ignore_ping_comment,
+    json_response_format,
+)
+try:
+    import infiagent
+    from infiagent.schemas import ChatCompleteRequest
+    from infiagent.services.complete_local_test import (
+        chat_local_event,
+        chat_local_event_generator,
+    )
+    from infiagent.utils import get_logger
+except ImportError:
+    print("import infiagent failed, please install infiagent by 'pip install .' in the pipeline directory of ADA-Agent")
+    from ..schemas import ChatCompleteRequest
+    from ..services.complete_local_test import (
+        chat_local_event,
+        chat_local_event_generator,
+    )
+    from ..utils import get_logger
+logger = get_logger()
+local_app = FastAPI()
+@local_app.post("/local_sse_test")
+async def complete_sse(request: Request):
+    body_str = await request.body()
+    try:
+        chat_request = ChatCompleteRequest.parse_raw(body_str)
+        logger.info("Got chat request: {}".format(chat_request))
+    except ValidationError as e:
+        error_msg = "Invalid input chat_request. Error: {}".format(str(e))
+        raise HTTPException(status_code=500, detail=error_msg)
+    return EventSourceResponse(async_sse_response_format(chat_local_event_generator(chat_request)),
+                               ping_message_factory=get_ignore_ping_comment())
+@local_app.post("/local_json_test")
+async def complete_json(request: Request):
+    body_str = await request.body()
+    try:
+        chat_request = ChatCompleteRequest.parse_raw(body_str)
+        logger.info("Got chat request: {}".format(chat_request))
+    except ValidationError as e:
+        error_msg = "Invalid input chat_request. Error: {}".format(str(e))
+        raise HTTPException(status_code=500, detail=error_msg)
+    response_items = await chat_local_event(chat_request)
+    return json_response_format(response_items)
+@local_app.post("/exception_test")
+async def complete_json(request: Request):
+    body_str = await request.body()
+    try:
+        chat_request = ChatCompleteRequest.parse_raw(body_str)
+        logger.info("Got chat request: {}".format(chat_request))
+    except ValidationError as e:
+        error_msg = "Invalid input chat_request. Error: {}".format(str(e))
+        raise HTTPException(status_code=500, detail=error_msg)
+    return EventSourceResponse(async_sse_response_format(chat_local_event_generator(chat_request)))
+async def exception_test(request: Request):
+    body_str = await request.body()
+    json_val = json.loads(body_str)
+    exception_type = json_val.get("exception", None)
+    if exception_type:
+        raise ValueError("Error triggerd!")
+    else:
+        yield iter(["Success"])

activities/predict.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from fastapi import APIRouter, File, Form, UploadFile
+from typing import List, Optional
+try:
+    import infiagent
+    from infiagent.services.chat_complete_service import predict
+except ImportError:
+    print("import infiagent failed, please install infiagent by 'pip install .' in the pipeline directory of ADA-Agent")
+    from ..services.chat_complete_service import predict
+predict_router = APIRouter()
+@predict_router.post("/predict")
+async def chat_predict(
+    prompt: str = Form(...),
+    model_name: str = Form(...),
+    psm: Optional[str] = Form(None),
+    dc: Optional[str] = Form(None),
+    temperature: Optional[str] = Form(None),
+    top_p: Optional[str] = Form(None),
+    top_k: Optional[str] = Form(None),
+    files: List[UploadFile] = File(...)
+):
+    kwargs = {}
+    if psm:
+        kwargs['psm'] = psm
+    if dc:
+        kwargs['dc'] = dc
+    if temperature:
+        kwargs['temperature'] = float(temperature)
+    if top_p:
+        kwargs['top_p'] = float(top_p)
+    if top_k:
+        kwargs['top_k'] = float(top_k)
+    response = await predict(prompt, model_name, files, **kwargs)
+    return {
+        "answer": response
+    }

activities/vllm_api_server.py ADDED Viewed

	@@ -0,0 +1,636 @@

+# Adapted from
+# https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/serve/openai_api_server.py
+import argparse
+import asyncio
+import json
+import time
+from http import HTTPStatus
+from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
+import fastapi
+import uvicorn
+from fastapi import Request
+from fastapi.exceptions import RequestValidationError
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, StreamingResponse, Response
+from packaging import version
+from vllm.engine.arg_utils import AsyncEngineArgs
+from vllm.engine.async_llm_engine import AsyncLLMEngine
+from vllm.entrypoints.openai.protocol import (
+    CompletionRequest, CompletionResponse, CompletionResponseChoice,
+    CompletionResponseStreamChoice, CompletionStreamResponse,
+    ChatCompletionRequest, ChatCompletionResponse,
+    ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice,
+    ChatCompletionStreamResponse, ChatMessage, DeltaMessage, ErrorResponse,
+    LogProbs, ModelCard, ModelList, ModelPermission, UsageInfo)
+from vllm.logger import init_logger
+from vllm.outputs import RequestOutput
+from vllm.sampling_params import SamplingParams
+from vllm.transformers_utils.tokenizer import get_tokenizer
+from vllm.utils import random_uuid
+try:
+    import fastchat
+    from fastchat.conversation import Conversation, SeparatorStyle
+    from fastchat.model.model_adapter import get_conversation_template
+    _fastchat_available = True
+except ImportError:
+    _fastchat_available = False
+TIMEOUT_KEEP_ALIVE = 5  # seconds
+logger = init_logger(__name__)
+served_model = None
+app = fastapi.FastAPI()
+engine = None
+def create_error_response(status_code: HTTPStatus,
+                          message: str) -> JSONResponse:
+    return JSONResponse(ErrorResponse(message=message,
+                                      type="invalid_request_error").dict(),
+                        status_code=status_code.value)
+@app.exception_handler(RequestValidationError)
+async def validation_exception_handler(request, exc):  # pylint: disable=unused-argument
+    return create_error_response(HTTPStatus.BAD_REQUEST, str(exc))
+async def check_model(request) -> Optional[JSONResponse]:
+    if request.model == served_model:
+        return
+    ret = create_error_response(
+        HTTPStatus.NOT_FOUND,
+        f"The model `{request.model}` does not exist.",
+    )
+    return ret
+async def get_gen_prompt(request) -> str:
+    if not _fastchat_available:
+        raise ModuleNotFoundError(
+            "fastchat is not installed. Please install fastchat to use "
+            "the chat completion and conversation APIs: `$ pip install fschat`"
+        )
+    if version.parse(fastchat.__version__) < version.parse("0.2.23"):
+        raise ImportError(
+            f"fastchat version is low. Current version: {fastchat.__version__} "
+            "Please upgrade fastchat to use: `$ pip install -U fschat`")
+    conv = get_conversation_template(request.model)
+    conv = Conversation(
+        name=conv.name,
+        system_template=conv.system_template,
+        system_message=conv.system_message,
+        roles=conv.roles,
+        messages=list(conv.messages),  # prevent in-place modification
+        offset=conv.offset,
+        sep_style=SeparatorStyle(conv.sep_style),
+        sep=conv.sep,
+        sep2=conv.sep2,
+        stop_str=conv.stop_str,
+        stop_token_ids=conv.stop_token_ids,
+    )
+    if isinstance(request.messages, str):
+        prompt = request.messages
+    else:
+        for message in request.messages:
+            msg_role = message["role"]
+            if msg_role == "system":
+                conv.system_message = message["content"]
+            elif msg_role == "user":
+                conv.append_message(conv.roles[0], message["content"])
+            elif msg_role == "assistant":
+                conv.append_message(conv.roles[1], message["content"])
+            else:
+                raise ValueError(f"Unknown role: {msg_role}")
+        # Add a blank message for the assistant.
+        conv.append_message(conv.roles[1], None)
+        prompt = conv.get_prompt()
+    return prompt
+async def check_length(
+    request: Union[ChatCompletionRequest, CompletionRequest],
+    prompt: Optional[str] = None,
+    prompt_ids: Optional[List[int]] = None
+) -> Tuple[List[int], Optional[JSONResponse]]:
+    assert (not (prompt is None and prompt_ids is None)
+            and not (prompt is not None and prompt_ids is not None)
+            ), "Either prompt or prompt_ids should be provided."
+    if prompt_ids is not None:
+        input_ids = prompt_ids
+    else:
+        input_ids = tokenizer(prompt).input_ids
+    token_num = len(input_ids)
+    if request.max_tokens is None:
+        request.max_tokens = max_model_len - token_num
+    if token_num + request.max_tokens > max_model_len:
+        return input_ids, create_error_response(
+            HTTPStatus.BAD_REQUEST,
+            f"This model's maximum context length is {max_model_len} tokens. "
+            f"However, you requested {request.max_tokens + token_num} tokens "
+            f"({token_num} in the messages, "
+            f"{request.max_tokens} in the completion). "
+            f"Please reduce the length of the messages or completion.",
+        )
+    else:
+        return input_ids, None
+@app.get("/health")
+async def health() -> Response:
+    """Health check."""
+    return Response(status_code=200)
+@app.get("/v1/models")
+async def show_available_models():
+    """Show available models. Right now we only have one model."""
+    model_cards = [
+        ModelCard(id=served_model,
+                  root=served_model,
+                  permission=[ModelPermission()])
+    ]
+    return ModelList(data=model_cards)
+def create_logprobs(token_ids: List[int],
+                    id_logprobs: List[Dict[int, float]],
+                    initial_text_offset: int = 0) -> LogProbs:
+    """Create OpenAI-style logprobs."""
+    logprobs = LogProbs()
+    last_token_len = 0
+    for token_id, id_logprob in zip(token_ids, id_logprobs):
+        token = tokenizer.convert_ids_to_tokens(token_id)
+        logprobs.tokens.append(token)
+        logprobs.token_logprobs.append(id_logprob[token_id])
+        if len(logprobs.text_offset) == 0:
+            logprobs.text_offset.append(initial_text_offset)
+        else:
+            logprobs.text_offset.append(logprobs.text_offset[-1] +
+                                        last_token_len)
+        last_token_len = len(token)
+        logprobs.top_logprobs.append({
+            tokenizer.convert_ids_to_tokens(i): p
+            for i, p in id_logprob.items()
+        })
+    return logprobs
+@app.post("/v1/chat/completions")
+async def create_chat_completion(request: ChatCompletionRequest,
+                                 raw_request: Request):
+    """Completion API similar to OpenAI's API.
+    See  https://platform.openai.com/docs/api-reference/chat/create
+    for the API specification. This API mimics the OpenAI ChatCompletion API.
+    NOTE: Currently we do not support the following features:
+        - function_call (Users should implement this by themselves)
+        - logit_bias (to be supported by vLLM engine)
+    """
+    logger.info(f"Received chat completion request: {request}")
+    error_check_ret = await check_model(request)
+    if error_check_ret is not None:
+        return error_check_ret
+    if request.logit_bias is not None and len(request.logit_bias) > 0:
+        # TODO: support logit_bias in vLLM engine.
+        return create_error_response(HTTPStatus.BAD_REQUEST,
+                                     "logit_bias is not currently supported")
+    prompt = await get_gen_prompt(request)
+    token_ids, error_check_ret = await check_length(request, prompt=prompt)
+    if error_check_ret is not None:
+        return error_check_ret
+    model_name = request.model
+    request_id = f"cmpl-{random_uuid()}"
+    created_time = int(time.monotonic())
+    try:
+        # spaces_between_special_tokens = request.spaces_between_special_tokens
+        sampling_params = SamplingParams(
+            n=request.n,
+            presence_penalty=request.presence_penalty,
+            frequency_penalty=request.frequency_penalty,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            stop=request.stop,
+            stop_token_ids=request.stop_token_ids,
+            max_tokens=request.max_tokens,
+            best_of=request.best_of,
+            top_k=request.top_k,
+            ignore_eos=request.ignore_eos,
+            use_beam_search=request.use_beam_search,
+            skip_special_tokens=request.skip_special_tokens,
+            # spaces_between_special_tokens=spaces_between_special_tokens,
+        )
+    except ValueError as e:
+        return create_error_response(HTTPStatus.BAD_REQUEST, str(e))
+    result_generator = engine.generate(prompt, sampling_params, request_id,
+                                       token_ids)
+    def create_stream_response_json(
+        index: int,
+        text: str,
+        finish_reason: Optional[str] = None,
+    ) -> str:
+        choice_data = ChatCompletionResponseStreamChoice(
+            index=index,
+            delta=DeltaMessage(content=text),
+            finish_reason=finish_reason,
+        )
+        response = ChatCompletionStreamResponse(
+            id=request_id,
+            created=created_time,
+            model=model_name,
+            choices=[choice_data],
+        )
+        response_json = response.json(ensure_ascii=False)
+        return response_json
+    async def completion_stream_generator() -> AsyncGenerator[str, None]:
+        # First chunk with role
+        for i in range(request.n):
+            choice_data = ChatCompletionResponseStreamChoice(
+                index=i,
+                delta=DeltaMessage(role="assistant"),
+                finish_reason=None,
+            )
+            chunk = ChatCompletionStreamResponse(id=request_id,
+                                                 choices=[choice_data],
+                                                 model=model_name)
+            data = chunk.json(exclude_unset=True, ensure_ascii=False)
+            yield f"data: {data}\n\n"
+        previous_texts = [""] * request.n
+        previous_num_tokens = [0] * request.n
+        async for res in result_generator:
+            res: RequestOutput
+            for output in res.outputs:
+                i = output.index
+                delta_text = output.text[len(previous_texts[i]):]
+                previous_texts[i] = output.text
+                previous_num_tokens[i] = len(output.token_ids)
+                response_json = create_stream_response_json(
+                    index=i,
+                    text=delta_text,
+                )
+                yield f"data: {response_json}\n\n"
+                if output.finish_reason is not None:
+                    response_json = create_stream_response_json(
+                        index=i,
+                        text="",
+                        finish_reason=output.finish_reason,
+                    )
+                    yield f"data: {response_json}\n\n"
+        yield "data: [DONE]\n\n"
+    # Streaming response
+    if request.stream:
+        return StreamingResponse(completion_stream_generator(),
+                                 media_type="text/event-stream")
+    # Non-streaming response
+    final_res: RequestOutput = None
+    async for res in result_generator:
+        if await raw_request.is_disconnected():
+            # Abort the request if the client disconnects.
+            await engine.abort(request_id)
+            return create_error_response(HTTPStatus.BAD_REQUEST,
+                                         "Client disconnected")
+        final_res = res
+    assert final_res is not None
+    choices = []
+    for output in final_res.outputs:
+        choice_data = ChatCompletionResponseChoice(
+            index=output.index,
+            message=ChatMessage(role="assistant", content=output.text),
+            finish_reason=output.finish_reason,
+        )
+        choices.append(choice_data)
+    num_prompt_tokens = len(final_res.prompt_token_ids)
+    num_generated_tokens = sum(
+        len(output.token_ids) for output in final_res.outputs)
+    usage = UsageInfo(
+        prompt_tokens=num_prompt_tokens,
+        completion_tokens=num_generated_tokens,
+        total_tokens=num_prompt_tokens + num_generated_tokens,
+    )
+    response = ChatCompletionResponse(
+        id=request_id,
+        created=created_time,
+        model=model_name,
+        choices=choices,
+        usage=usage,
+    )
+    if request.stream:
+        # When user requests streaming but we don't stream, we still need to
+        # return a streaming response with a single event.
+        response_json = response.json(ensure_ascii=False)
+        async def fake_stream_generator() -> AsyncGenerator[str, None]:
+            yield f"data: {response_json}\n\n"
+            yield "data: [DONE]\n\n"
+        return StreamingResponse(fake_stream_generator(),
+                                 media_type="text/event-stream")
+    return response
+@app.post("/v1/completions")
+async def create_completion(request: CompletionRequest, raw_request: Request):
+    """Completion API similar to OpenAI's API.
+    See https://platform.openai.com/docs/api-reference/completions/create
+    for the API specification. This API mimics the OpenAI Completion API.
+    NOTE: Currently we do not support the following features:
+        - echo (since the vLLM engine does not currently support
+          getting the logprobs of prompt tokens)
+        - suffix (the language models we currently support do not support
+          suffix)
+        - logit_bias (to be supported by vLLM engine)
+    """
+    logger.info(f"Received completion request: {request}")
+    error_check_ret = await check_model(request)
+    if error_check_ret is not None:
+        return error_check_ret
+    if request.echo:
+        # We do not support echo since the vLLM engine does not
+        # currently support getting the logprobs of prompt tokens.
+        return create_error_response(HTTPStatus.BAD_REQUEST,
+                                     "echo is not currently supported")
+    if request.suffix is not None:
+        # The language models we currently support do not support suffix.
+        return create_error_response(HTTPStatus.BAD_REQUEST,
+                                     "suffix is not currently supported")
+    if request.logit_bias is not None and len(request.logit_bias) > 0:
+        # TODO: support logit_bias in vLLM engine.
+        return create_error_response(HTTPStatus.BAD_REQUEST,
+                                     "logit_bias is not currently supported")
+    model_name = request.model
+    request_id = f"cmpl-{random_uuid()}"
+    use_token_ids = False
+    if isinstance(request.prompt, list):
+        if len(request.prompt) == 0:
+            return create_error_response(HTTPStatus.BAD_REQUEST,
+                                         "please provide at least one prompt")
+        first_element = request.prompt[0]
+        if isinstance(first_element, int):
+            use_token_ids = True
+            prompt = request.prompt
+        elif isinstance(first_element, (str, list)):
+            # TODO: handles multiple prompt case in list[list[int]]
+            if len(request.prompt) > 1:
+                return create_error_response(
+                    HTTPStatus.BAD_REQUEST,
+                    "multiple prompts in a batch is not currently supported")
+            use_token_ids = not isinstance(first_element, str)
+            prompt = request.prompt[0]
+    else:
+        prompt = request.prompt
+    if use_token_ids:
+        _, error_check_ret = await check_length(request, prompt_ids=prompt)
+    else:
+        token_ids, error_check_ret = await check_length(request, prompt=prompt)
+    if error_check_ret is not None:
+        return error_check_ret
+    created_time = int(time.monotonic())
+    try:
+        # spaces_between_special_tokens = request.spaces_between_special_tokens
+        sampling_params = SamplingParams(
+            n=request.n,
+            best_of=request.best_of,
+            presence_penalty=request.presence_penalty,
+            frequency_penalty=request.frequency_penalty,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            top_k=request.top_k,
+            stop=request.stop,
+            stop_token_ids=request.stop_token_ids,
+            ignore_eos=request.ignore_eos,
+            max_tokens=request.max_tokens,
+            logprobs=request.logprobs,
+            use_beam_search=request.use_beam_search,
+            skip_special_tokens=request.skip_special_tokens,
+            # spaces_between_special_tokens=spaces_between_special_tokens,
+        )
+    except ValueError as e:
+        return create_error_response(HTTPStatus.BAD_REQUEST, str(e))
+    if use_token_ids:
+        result_generator = engine.generate(None,
+                                           sampling_params,
+                                           request_id,
+                                           prompt_token_ids=prompt)
+    else:
+        result_generator = engine.generate(prompt, sampling_params, request_id,
+                                           token_ids)
+    # Similar to the OpenAI API, when n != best_of, we do not stream the
+    # results. In addition, we do not stream the results when use beam search.
+    stream = (request.stream
+              and (request.best_of is None or request.n == request.best_of)
+              and not request.use_beam_search)
+    def create_stream_response_json(
+        index: int,
+        text: str,
+        logprobs: Optional[LogProbs] = None,
+        finish_reason: Optional[str] = None,
+    ) -> str:
+        choice_data = CompletionResponseStreamChoice(
+            index=index,
+            text=text,
+            logprobs=logprobs,
+            finish_reason=finish_reason,
+        )
+        response = CompletionStreamResponse(
+            id=request_id,
+            created=created_time,
+            model=model_name,
+            choices=[choice_data],
+        )
+        response_json = response.json(ensure_ascii=False)
+        return response_json
+    async def completion_stream_generator() -> AsyncGenerator[str, None]:
+        previous_texts = [""] * request.n
+        previous_num_tokens = [0] * request.n
+        async for res in result_generator:
+            res: RequestOutput
+            for output in res.outputs:
+                i = output.index
+                delta_text = output.text[len(previous_texts[i]):]
+                if request.logprobs is not None:
+                    logprobs = create_logprobs(
+                        output.token_ids[previous_num_tokens[i]:],
+                        output.logprobs[previous_num_tokens[i]:],
+                        len(previous_texts[i]))
+                else:
+                    logprobs = None
+                previous_texts[i] = output.text
+                previous_num_tokens[i] = len(output.token_ids)
+                response_json = create_stream_response_json(
+                    index=i,
+                    text=delta_text,
+                    logprobs=logprobs,
+                )
+                yield f"data: {response_json}\n\n"
+                if output.finish_reason is not None:
+                    logprobs = (LogProbs()
+                                if request.logprobs is not None else None)
+                    response_json = create_stream_response_json(
+                        index=i,
+                        text="",
+                        logprobs=logprobs,
+                        finish_reason=output.finish_reason,
+                    )
+                    yield f"data: {response_json}\n\n"
+        yield "data: [DONE]\n\n"
+    # Streaming response
+    if stream:
+        return StreamingResponse(completion_stream_generator(),
+                                 media_type="text/event-stream")
+    # Non-streaming response
+    final_res: RequestOutput = None
+    async for res in result_generator:
+        if await raw_request.is_disconnected():
+            # Abort the request if the client disconnects.
+            await engine.abort(request_id)
+            return create_error_response(HTTPStatus.BAD_REQUEST,
+                                         "Client disconnected")
+        final_res = res
+    assert final_res is not None
+    choices = []
+    for output in final_res.outputs:
+        if request.logprobs is not None:
+            logprobs = create_logprobs(output.token_ids, output.logprobs)
+        else:
+            logprobs = None
+        choice_data = CompletionResponseChoice(
+            index=output.index,
+            text=output.text,
+            logprobs=logprobs,
+            finish_reason=output.finish_reason,
+        )
+        choices.append(choice_data)
+    num_prompt_tokens = len(final_res.prompt_token_ids)
+    num_generated_tokens = sum(
+        len(output.token_ids) for output in final_res.outputs)
+    usage = UsageInfo(
+        prompt_tokens=num_prompt_tokens,
+        completion_tokens=num_generated_tokens,
+        total_tokens=num_prompt_tokens + num_generated_tokens,
+    )
+    response = CompletionResponse(
+        id=request_id,
+        created=created_time,
+        model=model_name,
+        choices=choices,
+        usage=usage,
+    )
+    if request.stream:
+        # When user requests streaming but we don't stream, we still need to
+        # return a streaming response with a single event.
+        response_json = response.json(ensure_ascii=False)
+        async def fake_stream_generator() -> AsyncGenerator[str, None]:
+            yield f"data: {response_json}\n\n"
+            yield "data: [DONE]\n\n"
+        return StreamingResponse(fake_stream_generator(),
+                                 media_type="text/event-stream")
+    return response
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="vLLM OpenAI-Compatible RESTful API server.")
+    parser.add_argument("--host", type=str, default=None, help="host name")
+    parser.add_argument("--port", type=int, default=8000, help="port number")
+    parser.add_argument("--allow-credentials",
+                        action="store_true",
+                        help="allow credentials")
+    parser.add_argument("--allowed-origins",
+                        type=json.loads,
+                        default=["*"],
+                        help="allowed origins")
+    parser.add_argument("--allowed-methods",
+                        type=json.loads,
+                        default=["*"],
+                        help="allowed methods")
+    parser.add_argument("--allowed-headers",
+                        type=json.loads,
+                        default=["*"],
+                        help="allowed headers")
+    parser.add_argument("--served-model-name",
+                        type=str,
+                        default=None,
+                        help="The model name used in the API. If not "
+                        "specified, the model name will be the same as "
+                        "the huggingface name.")
+    parser = AsyncEngineArgs.add_cli_args(parser)
+    args = parser.parse_args()
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=args.allowed_origins,
+        allow_credentials=args.allow_credentials,
+        allow_methods=args.allowed_methods,
+        allow_headers=args.allowed_headers,
+    )
+    logger.info(f"args: {args}")
+    if args.served_model_name is not None:
+        served_model = args.served_model_name
+    else:
+        served_model = args.model
+    engine_args = AsyncEngineArgs.from_cli_args(args)
+    engine = AsyncLLMEngine.from_engine_args(engine_args)
+    engine_model_config = asyncio.run(engine.get_model_config())
+    max_model_len = engine_model_config.max_model_len
+    # A separate tokenizer to map token IDs to strings.
+    tokenizer = get_tokenizer(engine_args.tokenizer,
+                              tokenizer_mode=engine_args.tokenizer_mode,
+                              trust_remote_code=engine_args.trust_remote_code)
+    uvicorn.run(app,
+                host=args.host,
+                port=args.port,
+                log_level="info",
+                timeout_keep_alive=TIMEOUT_KEEP_ALIVE)

configs/agent_configs/react_agent_azureopenai_gpt_35_turbo_async.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+# ReAct Agent Template
+name: react_template
+version: 0.0.1
+type: react
+description: A react agent capable of code interpreter
+module_name: infiagent.agent.react
+class_name: AsyncReactAgent
+target_tasks:
+  - code interpreter
+llm:
+  model_name: gpt-35-turbo
+  module_name: in f i a gen r.llm
+  class_name: AzureOpenAIGPTClient
+  params:
+    temperature: 0.2
+    top_p: 0.95
+    repetition_penalty: 1.0
+    max_tokens: 4096
+prompt_template: !prompt ZeroShotReactPrompt
+plugins:
+  - name: python_code_sandbox
+    type: tool
+    config: configs/tool_configs/async_python_code_sandbox.yaml

configs/agent_configs/react_agent_azureopenai_gpt_4_async.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+# ReAct Agent Template
+name: gpt_4_react
+version: 0.0.1
+type: react
+description: A react agent capable of code interpreter
+module_name: infiagent.agent.react
+class_name: AsyncReactAgent
+target_tasks:
+  - code interpreter
+llm:
+  model_name: gpt-4-0613
+  module_name: infiagent.llm
+  class_name: AzureOpenAIGPTClient
+  params:
+    temperature: 0.2
+    top_p: 0.95
+    repetition_penalty: 1.0
+    max_tokens: 4096
+prompt_template: !prompt ZeroShotReactPrompt
+plugins:
+  - name: python_code_sandbox
+    type: tool
+    config: configs/tool_configs/async_python_code_sandbox.yaml

configs/agent_configs/react_agent_azureopenai_gpt_4_async_dcoker.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+# ReAct Agent Template
+name: gpt_4_react
+version: 0.0.1
+type: react
+description: A react agent capable of code interpreter
+module_name: infiagent.agent.react
+class_name: AsyncReactAgent
+target_tasks:
+  - code interpreter
+llm:
+  model_name: gpt-4-0613
+  module_name: infiagent.llm
+  class_name: AzureOpenAIGPTClient
+  params:
+    temperature: 0.2
+    top_p: 0.95
+    repetition_penalty: 1.0
+    max_tokens: 4096
+prompt_template: !prompt ZeroShotReactPrompt
+plugins:
+  - name: python_code_sandbox
+    type: tool
+    config: configs/tool_configs/async_python_code_sandbox_docker.yaml

configs/agent_configs/react_agent_gpt4_async.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+# ReAct Agent Template
+name: react_template
+version: 0.0.1
+type: react
+description: A react agent capable of code interpreter
+module_name: infiagent.agent.react
+class_name: AsyncReactAgent
+target_tasks:
+  - code interpreter
+llm:
+  model_name: gpt-4
+  module_name: infiagent.llm
+  class_name: OpenAIGPTClient
+  params:
+    temperature: 0.0
+    top_p: 0.9
+    repetition_penalty: 1.0
+    max_tokens: 1024
+prompt_template: !prompt ZeroShotReactPrompt
+plugins:
+  - name: python_code_sandbox
+    type: tool
+    config: configs/tool_configs/async_python_code_sandbox.yaml

configs/agent_configs/react_agent_llama_async.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+# ReAct Agent Template
+name: react_template
+version: 0.0.1
+type: react
+description: A react agent capable of code interpreter
+module_name: infiagent.agent.react
+class_name: AsyncReactAgent
+target_tasks:
+  - code interpreter
+llm:
+  model_name: meta-llama/Llama-2-7b-hf
+  module_name: infiagent.llm
+  class_name: LlamaOpenAIClient
+  params:
+    temperature: 0.0
+    top_p: 0.9
+    repetition_penalty: 1.0
+    max_tokens: 1024
+prompt_template: !prompt ZeroShotReactPrompt
+plugins:
+  - name: python_code_sandbox
+    type: tool
+    config: configs/tool_configs/async_python_code_sandbox.yaml

configs/agent_configs/react_agent_opt_async.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+# ReAct Agent Template
+name: react_template
+version: 0.0.1
+type: react
+description: A react agent capable of code interpreter
+module_name: infiagent.agent.react
+class_name: AsyncReactAgent
+target_tasks:
+  - code interpreter
+llm:
+  model_name: facebook/opt-125m
+  module_name: infiagent.llm
+  class_name: OptOpenAIClient
+  params:
+    temperature: 0.0
+    top_p: 0.9
+    repetition_penalty: 1.0
+    max_tokens: 1024
+prompt_template: !prompt ZeroShotReactPrompt
+plugins:
+  - name: python_code_sandbox
+    type: tool
+    config: configs/tool_configs/async_python_code_sandbox.yaml

configs/tool_configs/async_python_code_sandbox.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+name: python_code_sandbox
+version: 0.0.1
+type: tool
+description: this tool can help to run python script with python code as input
+module_name: infiagent.tools
+class_name: AsyncPythonSandBoxTool
+session_id: none

configs/tool_configs/async_python_code_sandbox_docker.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+name: python_code_sandbox
+version: 0.0.1
+type: tool
+description: this tool can help to run python script with python code as input
+module_name: infiagent.tools
+class_name: CodeTool
+session_id: none

run.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+#!/bin/bash
+set -ex
+poetry run python3 -m uvicorn src.activities.api:app --reload --host 0.0.0.0 --port ${PORT:-3000} --limit-max-requests 5000 --timeout-keep-alive 1200

run_demo.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+#!/bin/bash
+# set -ex
+streamlit run ./activities/local_demo.py --server.port 6006 -- $@

run_local.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+#!/bin/bash
+set -ex
+poetry run python3 -m uvicorn src.activities.local_test:local_app --reload --host 0.0.0.0 --port ${PORT:-3000} --limit-max-requests 5000 --timeout-keep-alive 1200

setup.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from setuptools import setup, find_packages
+setup(
+    name='infiagent',
+    version='0.1.0',
+    author='InfiAgent',
+    packages=find_packages(where='src'),
+    package_dir={'': 'src'},
+    url='https://github.com/InfiAgent/ADA-Agent',
+    license='LICENSE.txt',
+    description='An awesome package for InfiAgent.',
+    long_description=open('README.md').read(),
+    package_data={
+        'infiagent.configs.agent_configs': ['*.yaml'],
+        'infiagent.configs.tool_configs': ['*.yaml'],
+    },
+    install_requires=[
+        "streamlit",
+        "pyyaml",
+        "pytest",
+        "openai==0.27.7",
+        "fastapi",
+        "uvicorn",
+        "uvloop",
+        "watchdog",
+        "chardet",
+        "werkzeug",
+        "python-dotenv",
+        "motor",
+        "aiofiles",
+        "sse_starlette",
+        "loguru",
+        "jupyter_client",
+        "pandas",
+        "scikit-learn",
+        "scipy",
+        "ipykernel"
+    ],
+    python_requires='>=3.9'
+)

src/infiagent/__init__.py ADDED Viewed

File without changes

src/infiagent/agent/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .base_agent import BaseAgent
2	+ from .react import AsyncReactAgent

src/infiagent/agent/base_agent.py ADDED Viewed

	@@ -0,0 +1,337 @@

+import asyncio
+from abc import ABC, abstractmethod
+from typing import Dict, Callable, Union, AsyncGenerator
+from ..exceptions.exceptions import InputErrorException
+from ..prompt import PromptTemplate
+from ..schemas import AgentOutput, AgentType, AgentResponse
+from ..llm.base_llm import BaseLLM
+from ..tools import BaseTool
+from ..utils import Config, get_logger
+import os
+from importlib import import_module
+logger = get_logger()
+LLM_CONF_OVERRIDE_KEY = ['psm', 'dc', 'temperature', 'top_p', 'top_k', 'max_tokens']
+class BaseAgent(ABC):
+    """Base Agent class defining the essential attributes and methods for an ALM Agent.
+    """
+    def __init__(self, **kwargs):
+        """
+        Initializes an instance of the Agent class.
+        """
+        # Set default values
+        default_config = {
+            'name': 'agent',
+            'type': AgentType.react,
+            'version': '',
+            'description': '',
+            'prompt_template': None,
+            'auth': {}
+        }
+        # Update default values with provided config
+        default_config.update(kwargs)
+        # Access configuration data with a known default value
+        auth = default_config['auth']
+        self._set_auth_env(auth)
+        self._name: str = default_config['name']
+        self._type: AgentType = default_config['type']
+        self._version: str = default_config['version']
+        self._description: str = default_config['description']
+        self.__prompt_template: Union[PromptTemplate, None] = \
+            self._get_prompt_template(default_config['prompt_template'])
+        self.__llm: Union[BaseLLM, None] = None
+        self.__plugins_map: Dict = {}
+        self.__plugin_tool_function = {}
+        self.__plugin_tool_async_function = {}
+        self.__plugin_tool_description = None
+    @property
+    def name(self) -> str:
+        return self._name
+    @property
+    def type(self) -> AgentType:
+        return self._type
+    @property
+    def version(self) -> str:
+        return self._version
+    @property
+    def description(self) -> str:
+        return self._description
+    @property
+    def prompt_template(self) -> PromptTemplate:
+        return self.__prompt_template
+    @property
+    def llm(self) -> Union[BaseLLM, None]:
+        return self.__llm
+    @llm.setter
+    def llm(self, llm_client: BaseLLM):
+        if llm_client is None or not isinstance(llm_client, BaseLLM):
+            raise InputErrorException("Invalid llm client {}".format(type(llm_client)))
+        self.__llm = llm_client
+    @property
+    def plugins_map(self) -> Dict:
+        return self.__plugins_map.copy()  # Return a copy to prevent external modification
+    def add_plugin(self, tool_name: str, tool):
+        if not tool_name or not tool:
+            raise InputErrorException("Adding invalid tool name: {}, type {}".format(tool_name, type(tool)))
+        self.__plugins_map[tool_name] = tool
+    def _set_auth_env(self, obj):
+        """This method sets environment variables for authentication.
+        """
+        for key in obj:
+            os.environ[key] = obj.get(key)
+    def _get_prompt_template(self, obj):
+        """This method returns a prompt template instance based on the provided configuration.
+        """
+        assert isinstance(obj, dict) or isinstance(obj, PromptTemplate)
+        if isinstance(obj, dict):
+            return {
+                key: self._parse_prompt_template(obj[key]) for key in obj
+            }
+        elif isinstance(obj, PromptTemplate):
+            ans = self._parse_prompt_template(obj)
+            return ans
+        else:
+            raise InputErrorException("Invalid PromptTemplate, it should be a dict or PromptTemplate. But get {}"
+                                      .format(type(obj)))
+    def _parse_prompt_template(self, obj: Union[dict, PromptTemplate]):
+        """This method parses the prompt template configuration and returns a prompt template instance.
+        """
+        assert isinstance(obj, dict) or isinstance(obj, PromptTemplate)
+        if isinstance(obj, PromptTemplate):
+            return obj
+        return PromptTemplate(input_variables=obj['input_variables'],
+                              template=obj['template'],
+                              validate_template=bool(obj.get('validate_template', True)))
+    @classmethod
+    def _get_basic_instance_from_config(cls, config_data):
+        agent_module_name = config_data.get("module_name", None)
+        agent_class_name = config_data.get("class_name", None)
+        if not agent_module_name or not agent_class_name:
+            raise InputErrorException("Agent module_name and class_name required, please check your config")
+        module = import_module(agent_module_name)
+        clazz = getattr(module, agent_class_name)
+        agent_instance = clazz(**config_data)
+        return agent_instance
+    @classmethod
+    def from_config_path_and_kwargs(cls, config_path, **kwargs):
+        config_data = Config.load(config_path)
+        logger.info(f"Use config from path {config_path} to init agent : {config_data}")
+        agent_instance = cls._get_basic_instance_from_config(config_data)
+        if 'llm' in config_data and 'params' in config_data['llm']:
+            for param in LLM_CONF_OVERRIDE_KEY:
+                if param in kwargs and kwargs[param]:
+                    logger.info(f"Overwrite with new {param} {kwargs[param]}")
+                    config_data['llm']['params'][param] = kwargs[param]
+        assert isinstance(agent_instance, BaseAgent)
+        agent_instance._init_llm(config_data.get("llm", {}))
+        agent_instance._init_plugins(config_data.get('plugins', []))
+        return agent_instance
+    def _init_llm(self, obj):
+        """
+            This method parses the Language Model Manager (LLM) configuration and returns an LLM instance.
+            :param obj: A configuration dictionary or string.
+            :type obj: dict or str
+            :raises ValueError: If the specified LLM is not supported.
+            :return: An LLM instance.
+            :rtype: BaseLLM
+        """
+        if isinstance(obj, str):
+            name = obj
+            model_params = dict()
+        else:
+            name = obj.get('model_name', None)
+            model_params = obj.get('params', dict())
+        module_name = obj['module_name']
+        class_name = obj['class_name']
+        module = import_module(module_name)
+        clazz = getattr(module, class_name)
+        llm = clazz(model_name=name, params=model_params)
+        self.llm = llm
+    def _init_plugins(self, configs):
+        """
+            This method parses the plugin configuration and add each plugin into the plugins_map.
+        """
+        assert isinstance(configs, list)
+        for plugin_config in configs:
+            if plugin_config.get('type', "") == 'agent':
+                # Agent as plugin
+                agent = BaseAgent.from_config_path_and_kwargs(plugin_config['config'])
+                self.plugins_map[plugin_config['name']] = agent
+            else:
+                # Tools as plugin
+                params = plugin_config.get('params', dict())
+                tool = BaseTool.from_config(config_input=plugin_config['config'], **params)
+                self.plugins_map[tool.name] = tool
+    @classmethod
+    async def async_from_config_path_and_kwargs(cls, config_path, **kwargs):
+        config_data = Config.load(config_path)
+        logger.info(f"Use config from path {config_path} to init agent : {config_data}")
+        agent_instance = cls._get_basic_instance_from_config(config_data)
+        # override default config with user input
+        if 'llm' in config_data and 'params' in config_data['llm']:
+            for param in LLM_CONF_OVERRIDE_KEY:
+                if param in kwargs and kwargs[param]:
+                    logger.info(f"Overwrite with new {param} {kwargs[param]}")
+                    config_data['llm']['params'][param] = kwargs[param]
+        # Create tasks for llm and each individual plugin
+        llm_config = config_data.get("llm", {})
+        plugin_configs = config_data.get('plugins', [])
+        # Create tasks for llm and each individual plugin
+        llm_task = asyncio.create_task(cls._async_init_llm(llm_config))
+        plugin_tasks = [asyncio.create_task(cls._async_init_plugin(plugin_config)) for
+                        plugin_config in plugin_configs]
+        # Gather results
+        llm, *plugins = await asyncio.gather(llm_task, *plugin_tasks)
+        agent_instance.llm = llm
+        for plugin in plugins:
+            plugin_name, plugin_instance = plugin
+            agent_instance.add_plugin(plugin_name, plugin_instance)
+        return agent_instance
+    @classmethod
+    async def _async_init_llm(cls, llm_config):
+        llm_model_name = llm_config.get("module_name", None)
+        llm_class_name = llm_config.get("class_name", None)
+        if not llm_model_name or not llm_class_name:
+            raise InputErrorException("Agent LLM module_name and class_name required, please check your config")
+        module = import_module(llm_model_name)
+        clazz = getattr(module, llm_class_name)
+        assert issubclass(clazz, BaseLLM), f"{clazz} is not a subclass of BaseLLM"
+        llm_instance = await clazz.create(config_data=llm_config)
+        return llm_instance
+    @classmethod
+    async def _async_init_plugin(cls, plugin_config):
+        if plugin_config.get('type', "") == 'agent':
+            # Agent as plugin
+            agent = await BaseAgent.async_from_config_path_and_kwargs(plugin_config['config'])
+            return plugin_config['name'], agent
+        else:
+            # Tool as plugin
+            params = plugin_config.get('params', dict())
+            name = plugin_config.get('name', None)
+            config = plugin_config['config']
+            tool = await BaseTool.async_from_config(config_input=config, **params)
+            if name is None:
+                name = tool.name
+            logger.info("Init tool with name [{}], and description [{}]".format(name, tool.description))
+            return name, tool
+    @abstractmethod
+    def run(self, *args, **kwargs) -> [AgentResponse, None]:
+        """Abstract method to be overridden by child classes for running the agent.
+        :return: The output of the agent.
+        :rtype: AgentOutput
+        """
+        pass
+    async def async_run(self, *args, **kwargs) -> AsyncGenerator[AgentResponse, None]:
+        """Abstract method to be overridden by child classes for running the agent.
+        :return: The output of the agent.
+        """
+        yield self.run(*args, **kwargs)
+    def _get_plugin_function_map(self, method_name: str) -> Dict[str, Callable]:
+        if method_name == "run" and self.__plugin_tool_function:
+            return self.__plugin_tool_function
+        elif method_name == "async_run" and self.__plugin_tool_async_function:
+            return self.__plugin_tool_async_function
+        function_map = {}
+        for name, plugin_tool in self.plugins_map.items():
+            if isinstance(plugin_tool, (BaseTool, BaseAgent)):
+                function_map[name] = getattr(plugin_tool, method_name)
+            else:
+                logger.warning(f"No support for plugin name {name} of type {type(plugin_tool)}")
+        if method_name == "run":
+            self.__plugin_tool_function = function_map
+        elif method_name == "async_run":
+            self.__plugin_tool_async_function = function_map
+        return function_map
+    def get_plugin_tool_function(self) -> Dict[str, Callable]:
+        """Format the function map for the function API.
+        :return: The function map.
+        :rtype: Dict[str, Callable]
+        """
+        return self._get_plugin_function_map("run")
+    def get_plugin_tool_async_function(self) -> Dict[str, Callable]:
+        """Format the function map for the function API.
+        :return: The function map.
+        :rtype: Dict[str, Callable]
+        """
+        return self._get_plugin_function_map("async_run")
+    def _get_plugin_description(self):
+        if self.__plugin_tool_description:
+            return self.__plugin_tool_description
+        descriptions = ""
+        try:
+            for plugin_name, plugin in self.plugins_map.items():
+                descriptions += f"{plugin_name}[input]: {plugin.description}\n"
+        except Exception as e:
+            err_msg = "Failed to get plugin tool name and description. error: {}".format(str(e))
+            raise InputErrorException(err_msg) from e
+        self.__plugin_tool_description = descriptions
+        return descriptions
+    def clear(self):
+        """
+        Clear and reset the agent.
+        """
+        pass

src/infiagent/agent/react/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .async_react_agent import AsyncReactAgent
+__all__ = [
+    'AsyncReactAgent'
+]

src/infiagent/agent/react/async_react_agent.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import re
+import time
+from typing import Union, List, Dict
+from werkzeug.datastructures import FileStorage
+from .. import BaseAgent
+from ...exceptions.exceptions import InternalErrorException, LLMException, SandboxException
+from ...schemas import (
+    AgentType, AgentRequest, AgentFinish, AgentAction, AgentResponse,
+    BaseAgentResponse, AgentObservation, RunCodeOutput, MediaFile
+)
+from ...tools import PythonSandBoxToolResponse, AsyncPythonSandBoxTool
+from ...utils import get_logger, replace_latex_format, extract_and_replace_url, \
+    OBSERVATION_PREFIX_CN, OBSERVATION_PREFIX_EN, AGENT_FAILED_CN, AGENT_FAILED_EN, \
+    TOOL_INPUT_PREFIX_CN, TOOL_INPUT_PREFIX_EN
+SAND_BOX_PLUGIN_NAME = 'python_code_sandbox'
+FINAL_ANSWER_INDICATORS = ["Final Answer:", "[END]", "The final Answer", "final answer"]
+CODE_BLOCK_START_TAG = '```python'
+CODE_BLOCK_TAG = '```'
+logger = get_logger()
+SAND_BOX_PLUGIN_NAME = 'python_code_sandbox'
+FINAL_ANSWER_INDICATORS = ["Final Answer:", "[END]", "The final Answer", "final answer"]
+CODE_BLOCK_START_TAG = '```python'
+CODE_BLOCK_TAG = '```'
+STOP_WORD = ['Observation:']
+logger = get_logger()
+class AsyncReactAgent(BaseAgent):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._name = self._name or "AsyncReactAgent"
+        self._type = AgentType.react
+        self.__intermediate_steps: List[BaseAgentResponse] = []
+    @property
+    def intermediate_steps(self):
+        return self.__intermediate_steps
+    def run(self, *args, **kwargs):
+        pass
+    async def sync_to_sandbox(self, file: Union[str, Dict, FileStorage]):
+        sandbox_plugin = self.plugins_map.get(SAND_BOX_PLUGIN_NAME)
+        if not isinstance(sandbox_plugin, (AsyncPythonSandBoxTool, AsyncPythonSandBoxTool)):
+            raise InternalErrorException("SandBox client is not ready for agent, please check init logic.")
+        return await sandbox_plugin.sync_to_sandbox(file)
+    async def async_run(self, agent_req: AgentRequest):
+        instruction = '\n'.join(message.content for message in agent_req.messages)
+        async for response in self._chat(instruction, is_cn=agent_req.is_cn):
+            yield response
+    async def _chat(self, instruction: str, is_cn=False, max_iterations=10,
+                    max_single_step_iterations=3):
+        current_iteration = 0
+        for _ in range(max_iterations):
+            current_iteration += 1
+            llm_response = await self._single_round_thought(instruction,
+                                                            max_llm_iteration=max_single_step_iterations,
+                                                            is_cn=is_cn)
+            logger.info("Round {} of {}, [LLM raw output]:\n{}\n\n[Formatted output]:\n{}\n"
+                        .format(current_iteration, max_iterations, llm_response.raw_output,
+                                llm_response.formatted_output))
+            yield self.create_agent_response(llm_response.formatted_output, [], llm_response.raw_output)
+            if isinstance(llm_response, AgentFinish):
+                logger.info("Find final answer, stop iteration.")
+                break
+            self.intermediate_steps.append(llm_response)
+            action_response, cur_output_files = await self._process_agent_action(llm_response, current_iteration,
+                                                                                 max_iterations, is_cn)
+            logger.info("Round {} of {}, [Plugin raw output]:\n{}\n[Formatted output]:\n{}\n"
+                        .format(current_iteration, max_iterations, action_response.raw_output,
+                                action_response.formatted_output))
+            self.intermediate_steps.append(action_response)
+            yield self.create_agent_response(action_response.formatted_output,
+                                             cur_output_files,
+                                             action_response.raw_output)
+        logger.info(f"Finished iteration in {current_iteration}.")
+    # TODO update logic to not be sandbox specific, sandbox related logic should be handled in sandbox client
+    async def _process_agent_action(self, response, current_iteration, max_iterations, is_cn: bool = False):
+        try:
+            response.tool = 'python_code_sandbox'
+            action_response = await self.get_plugin_tool_async_function()[response.tool](response.tool_input)
+            logger.info(
+                f"Step {current_iteration} of {max_iterations}. Got agent observation raw output:\n"
+                f"{action_response.output_text}")
+            if "STDERR" in action_response.output_text:
+                formatted_output = self._process_sandbox_output(action_response.output_text)
+            else:
+                formatted_output = action_response.output_text
+            formatted_output = replace_latex_format(formatted_output)
+            observation_prefix = OBSERVATION_PREFIX_CN if is_cn else OBSERVATION_PREFIX_EN
+            formatted_output = f"{observation_prefix}\n{formatted_output}\n"
+            action_observation = AgentObservation(tool=response.tool,
+                                                  formatted_output=formatted_output,
+                                                  raw_output=action_response.output_text)
+            cur_output_files = self._get_output_files(action_response)
+            return action_observation, cur_output_files
+        except Exception as e:
+            logger.error(f"Error occurred while executing tool {response.tool} with input {response.tool_input}. "
+                         f"Error: {str(e)}", exc_info=True)
+            # TODO: We hard code here as we only have one tool
+            raise SandboxException("Error occurred while running the tool") from e
+    def _compose_prompt(self, instruction) -> str:
+        """
+        Compose the prompt from template, worker description, examples and instruction.
+        """
+        agent_scratchpad = self.prompt_template.construct_scratchpad(self.__intermediate_steps)
+        tool_description = self._get_plugin_description()
+        tool_names = ", ".join(list(self.plugins_map.keys()))
+        if self.prompt_template is None:
+            raise InternalErrorException("Agent prompt is none, please check init process")
+        return self.prompt_template.format(
+            instruction=instruction,
+            agent_scratchpad=agent_scratchpad,
+            tool_description=tool_description,
+            tool_names=tool_names
+        )
+    async def _single_round_thought(self, instruction: str, max_llm_iteration=3, is_cn: bool = False) -> \
+            Union[AgentAction, AgentFinish]:
+        llm_iteration_count = 0
+        llm_response = None
+        while llm_iteration_count <= max_llm_iteration:
+            llm_iteration_count += 1
+            try:
+                llm_response = await self._get_llm_response(instruction)
+                action_response = self._parse_output(llm_response.content, is_cn)
+                return action_response
+            except Exception as e:
+                logger.error("LLM iteration {} out of {} failed. Error: {}".
+                             format(llm_iteration_count, max_llm_iteration, str(e)), exc_info=True)
+                if llm_iteration_count > max_llm_iteration:
+                    logger.error("LLM iteration {} exceed max retry {}. Aborting".
+                                 format(llm_iteration_count, max_llm_iteration))
+                    return AgentFinish(formatted_output=AGENT_FAILED_CN if is_cn else AGENT_FAILED_EN,
+                                       raw_output=str(llm_response))
+    async def _get_llm_response(self, instruction: str):
+        prompt = self._compose_prompt(instruction)
+        logger.info("Send prompt to LLM:\n{}".format(prompt))
+        response = await self.llm.async_completion(prompt)
+        if response.state == "error":
+            raise LLMException("Failed to retrieve response from LLM, error: {}".format(str(response.content)))
+        logger.info("Got response from llm, raw response content: \n{}".format(response.content))
+        return response
+    def _parse_output(self, llm_output: str, is_cn: bool = False) -> Union[AgentAction, AgentFinish]:
+        for stop_word in STOP_WORD:
+            if stop_word in llm_output:
+                llm_output = llm_output.split(stop_word)[0].rstrip()
+                break
+        # Check for Final Answer, if it is final, then just return
+        for indicator in FINAL_ANSWER_INDICATORS:
+            if indicator in llm_output:
+                # got final answer and remove the indicator
+                parts = llm_output.split(indicator)
+                # formatted_output = ''.join(parts[:-1]).strip()
+                formatted_output = ''.join(parts).strip()
+                formatted_output = replace_latex_format(formatted_output)
+                return AgentFinish(raw_output=llm_output, formatted_output=formatted_output)
+        # Updated regex pattern for capturing the expected input format
+        ACTION_REGEX_1 = r"(.*?)\n?Action:\s*(.*?)\n?Action\s*Input:\s*```python\n(.*?)```(.*?)$|(.*?)\n?'''(\w+)\n?(.*?)\n?'''(.*?)$"
+        ACTION_REGEX_2 = r"(.*?)\n?Action:\s*(.*?)\n?Action\s*Input:\s*```py\n(.*?)```(.*?)$|(.*?)\n?'''(\w+)\n?(.*?)\n?'''(.*?)$"
+        action_match = re.search(ACTION_REGEX_1, llm_output, re.DOTALL) or re.search(ACTION_REGEX_2, llm_output, re.DOTALL)
+        # Find action, context, and action input, build action response
+        if action_match:
+            context = action_match.group(1).strip()
+            action_tool_description = action_match.group(2).strip()
+            action_input = action_match.group(3).strip()
+            # Format code
+            # TODO: currently we only have one plugin which is sandbox, update to support multiple tools
+            format_code_block = self._format_code_block(action_input)
+            prefix = TOOL_INPUT_PREFIX_CN if is_cn else TOOL_INPUT_PREFIX_EN
+            formatted_output = "{}\n{}\n{}\n".format(context, prefix, format_code_block)
+            formatted_output = replace_latex_format(formatted_output)
+            return AgentAction(tool=action_tool_description,
+                               tool_input=format_code_block,
+                               formatted_output=formatted_output,
+                               raw_output=llm_output)
+        # Not final answer and not action, raise exception
+        if not re.search(r"Action\s*:", llm_output, re.DOTALL):
+            raise LLMException(f"Missing 'Action' in LLM output: `{llm_output}`")
+        elif not re.search(r"Action\s*Input\s*:", llm_output, re.DOTALL):
+            raise LLMException(f"Missing 'Action Input' in LLM output: `{llm_output}`")
+        else:
+            raise LLMException(f"Unrecognized LLM output format: `{llm_output}`")
+    def _format_code_block(self, tool_input):
+        stripped_tool_input = tool_input.strip()
+        if stripped_tool_input.startswith(CODE_BLOCK_START_TAG) and stripped_tool_input.endswith(CODE_BLOCK_TAG):
+            if not stripped_tool_input.startswith(CODE_BLOCK_START_TAG + '\n'):
+                stripped_tool_input = CODE_BLOCK_START_TAG + '\n' + stripped_tool_input[len(CODE_BLOCK_START_TAG):] + \
+                                      '\n'
+            formatted_code = stripped_tool_input
+        elif stripped_tool_input.startswith(CODE_BLOCK_TAG) and not stripped_tool_input.startswith(
+                CODE_BLOCK_START_TAG) and stripped_tool_input.endswith(CODE_BLOCK_TAG):
+            formatted_code = CODE_BLOCK_START_TAG + '\n' + stripped_tool_input[len(CODE_BLOCK_TAG):] + '\n'
+        else:
+            formatted_code = CODE_BLOCK_START_TAG + '\n' + stripped_tool_input + '\n' + CODE_BLOCK_TAG + '\n'
+        return formatted_code.encode("utf-8").decode("utf-8")
+    def _process_sandbox_output(self, output: str):
+        """Function to process the result containing STDERR."""
+        if len(output) <= 1000:
+            return output
+        logger.info("Output contains error, original message is over 1000, trim it for response. ori output: \n{}".
+                    format(output))
+        rows = output.split("\n")
+        # Get the first 500 characters, respecting line boundaries
+        top_segment = []
+        length = 0
+        for sub_p in rows:
+            if length + len(sub_p) > 500:
+                break
+            top_segment.append(sub_p)
+            length += len(sub_p)
+        # Get the last 500 characters, respecting line boundaries
+        bottom_segment = []
+        length = 0
+        for sub_p in reversed(rows):
+            if length + len(sub_p) > 500:
+                break
+            bottom_segment.insert(0, sub_p)
+            length += len(sub_p)
+        # Combine the segments with "......" in between
+        timed_output = "\n".join(top_segment + ["......"] + bottom_segment)
+        return timed_output
+    def _get_output_files(self, tool_response) -> list[MediaFile]:
+        output_files = []
+        if isinstance(tool_response, PythonSandBoxToolResponse) and isinstance(tool_response.raw_output, RunCodeOutput):
+            raw_output = tool_response.raw_output
+            if raw_output.code == 0 and not raw_output.data.is_partial:
+                result_data = raw_output.data.result
+                # TODO confirm if we still need output and format
+                if len(result_data.new_generated_files) > 0:
+                    output_files.extend([MediaFile(tos_path=file.download_link) for file in
+                                         result_data.new_generated_files])
+                if len(result_data.code_output_result) > 0:
+                    output_files.extend(
+                        [MediaFile(tos_path=image.content) for image in result_data.code_output_result
+                         if image.type == 'image'])
+        return output_files
+    def _replace_csv_path(self, input_string):
+        # Search for the pattern and replace it
+        pattern = r'pd\.read_csv\(["\'](.*\.csv)["\']\)'
+        replacement = "pd.read_csv('/path/to/your/dataset')"
+        updated_string = re.sub(pattern, replacement, input_string)
+        return updated_string
+    @staticmethod
+    def create_agent_response(formatted_output, output_files, raw_output):
+        return AgentResponse(output_text=formatted_output, output_files=output_files, raw_output_text=raw_output)

src/infiagent/conversation_sessions/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .code_interpreter_session import CodeInterpreterSession

src/infiagent/conversation_sessions/code_interpreter_session.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import logging
+import os
+import time
+from typing import Any, Dict, Union
+from werkzeug.datastructures import FileStorage
+from ..agent import BaseAgent
+from ..agent.react import AsyncReactAgent
+from ..schemas import AgentRequest, MediaFile, Message, RoleType
+from ..utils import generate_random_string, get_logger, get_model_config_path
+logger = get_logger()
+class CodeInterpreterSession:
+    def __init__(
+            self,
+            session_id: Union[None, str] = None,
+            model_name: Union[None, str] = "openai",
+            config_path: Union[None, str] = None,
+            agent: AsyncReactAgent = None,
+            **kwargs):
+        self.session_id = session_id
+        self.config_path = config_path
+        self.input_files = []
+        self.output_files = []
+        self.messages = []
+        self.agent = agent
+        self.llm_model_name = self.agent.llm.model_name
+        logger.info("Use model {} and llm in config {} for conversation {}"
+                    .format(model_name, self.llm_model_name, self.config_path, self.session_id))
+    @classmethod
+    async def create(cls,
+                     model_name: Union[None, str] = "openai",
+                     config_path: Union[None, str] = None,
+                     **kwargs: Dict[str, Any]):
+        if config_path is None:
+            config_path = get_model_config_path(model_name)
+        logger.info(f"Use Config Path: {config_path}")
+        sandbox_id = generate_random_string(12)
+        # setup agent
+        agent = await BaseAgent.async_from_config_path_and_kwargs(config_path, **kwargs)
+        await agent.plugins_map["python_code_sandbox"].set_sandbox_id(sandbox_id)
+        return cls(session_id=sandbox_id,
+                   model_name=model_name,
+                   config_path=config_path,
+                   agent=agent)
+    async def upload_to_sandbox(self, file: Union[str, FileStorage]):
+        dst_path = await self.agent.sync_to_sandbox(file)
+        message = f'User uploaded the following files: {dst_path}\n'
+        logging.info(f"The file path {file} has been synced to sandbox with file path {dst_path}")
+        self.messages.append(Message(RoleType.System, message))
+        self.input_files.append(MediaFile(file_name=os.path.basename(dst_path), sandbox_path=dst_path))
+    async def chat(self, user_messages, input_files=None):
+        start_time = time.time()
+        self.messages.extend(user_messages)
+        agent_request = AgentRequest(
+            messages=self.messages,
+            input_files=self.input_files,
+            sandbox_id=self.session_id
+        )
+        logger.info(f"Agent request: {agent_request.__dict__}")
+        async for agent_response in self.agent.async_run(agent_request):
+            logger.info(f"Agent response:\n{agent_response.output_text}")
+            self.messages.append(Message(RoleType.System, agent_response.output_text))
+            yield agent_response
+        exec_time = time.time()
+        logger.info(
+            f'Agent Execution Latency: {exec_time - start_time}'
+        )
+    def __enter__(self):
+        pass
+    def __exit__(self, exc_type, exc_value, traceback) -> None:
+        pass

src/infiagent/exceptions/__init__.py ADDED Viewed

File without changes

src/infiagent/exceptions/exceptions.py ADDED Viewed

	@@ -0,0 +1,46 @@

+class DependencyException(Exception):
+    pass
+class InputErrorException(Exception):
+    pass
+class InternalErrorException(Exception):
+    pass
+class DatabaseException(DependencyException):
+    def __init__(self, message, *args: object):
+        super().__init__(message, *args)
+class SandboxException(DependencyException):
+    def __init__(self, message, *args: object):
+        super().__init__(message, *args)
+class LLMException(DependencyException):
+    def __init__(self, message, *args: object):
+        super().__init__(message, *args)
+class ModelMaxIterationsException(DependencyException):
+    def __init__(self, message, *args: object):
+        super().__init__(message, *args)
+class InvalidConfigException(InputErrorException):
+    def __init__(self, message, *args: object):
+        super().__init__(message, *args)
+class SandBoxFileUploadException(SandboxException):
+    def __init__(self, message, *args: object):
+        super().__init__(message, *args)
+class PluginException(DependencyException):
+    def __init__(self, message, *args: object):
+        super().__init__(message, *args)

src/infiagent/llm/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .client.openai import *
+from .client.azure_openai import *
+from .client.opt import *
+from .client.llama import *
+from .base_llm import *

src/infiagent/llm/base_llm.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from abc import ABC
+from ..exceptions.exceptions import InputErrorException
+from ..schemas import BaseCompletion
+class BaseLLM(ABC):
+    def __init__(self, model_name: str, params: dict, **kwargs):
+        self.__model_name = model_name
+        self.__params = params
+    @classmethod
+    async def create(cls, config_data: dict):
+        pass
+    @property
+    def model_name(self) -> str:
+        return self.__model_name
+    @model_name.setter
+    def model_name(self, model_name):
+        if model_name is None:
+            raise InputErrorException("Invalid model_name {}".format(model_name))
+        self.__model_name = model_name
+    @property
+    def params(self) -> dict:
+        return self.__params
+    def completion(self, prompt) -> BaseCompletion:
+        pass
+    async def async_completion(self, prompt) -> BaseCompletion:
+        pass

src/infiagent/llm/client/__init__.py ADDED Viewed

File without changes

src/infiagent/llm/client/azure_openai.py ADDED Viewed

	@@ -0,0 +1,346 @@

+import json
+import logging
+import os
+from abc import ABC
+from typing import Callable, List
+import openai
+from tenacity import (  # for exponential backoff
+    before_sleep_log,
+    retry,
+    stop_after_attempt,
+    wait_random_exponential,
+)
+from ..base_llm import BaseLLM
+from ...schemas import *
+logger = logging.getLogger(__name__)
+MAX_PROMPT_LENGTH = 7000
+@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(100), reraise=True,
+       before_sleep=before_sleep_log(logger, logging.WARNING))
+def chatcompletion_with_backoff(**kwargs):
+    return openai.ChatCompletion.create(**kwargs)
+@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(100), reraise=True,
+       before_sleep=before_sleep_log(logger, logging.WARNING))
+async def async_chatcompletion_with_backoff(**kwargs):
+    async def _internal_coroutine():
+        return await openai.ChatCompletion.acreate(**kwargs)
+    return await _internal_coroutine()
+class AzureOpenAIGPTClient(BaseLLM, ABC):
+    """
+    Wrapper class for OpenAI GPT API collections.
+    :param model_name: The name of the model to use.
+    :type model_name: str
+    :param params: The parameters for the model.
+    :type params: AzureOpenAIParamModel
+    """
+    model_name: str
+    params: AzureOpenAIParamModel = AzureOpenAIParamModel()
+    def __init__(self, **data):
+        super().__init__(**data)
+        openai.api_key = os.environ.get("OPENAI_API_KEY", "")
+        openai.api_type = "azure"
+        openai.api_base = "https://search.bytedance.net/gpt/openapi/online/v2/crawl"
+        openai.api_version = "2023-06-01-preview"
+    @classmethod
+    async def create(cls, config_data):
+        return AzureOpenAIGPTClient(**config_data)
+    def get_model_name(self) -> str:
+        return self.model_name
+    def get_model_param(self) -> AzureOpenAIParamModel:
+        return self.params
+    def completion(self, prompt: str, **kwargs) -> BaseCompletion:
+        """
+        Completion method for OpenAI GPT API.
+        :param prompt: The prompt to use for completion.
+        :type prompt: str
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: BaseCompletion object.
+        :rtype: BaseCompletion
+        """
+        response = chatcompletion_with_backoff(
+            engine=self.get_model_name(),  # GPT-4
+            messages=[
+                {"role": "user", "content": prompt[-MAX_PROMPT_LENGTH:]}
+            ],
+            timeout=1000,
+            temperature=self.params.temperature,
+            max_tokens=self.params.max_tokens,
+            top_p=self.params.top_p,
+            frequency_penalty=self.params.frequency_penalty,
+            presence_penalty=self.params.presence_penalty,
+            **kwargs
+        )
+        return BaseCompletion(state="success",
+                              content=response.choices[0].message["content"],
+                              prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                              completion_token=response.get("usage", {}).get("completion_tokens", 0))
+    async def async_completion(self, prompt: str, **kwargs) -> BaseCompletion:
+        """
+        Completion method for OpenAI GPT API.
+        :param prompt: The prompt to use for completion.
+        :type prompt: str
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: BaseCompletion object.
+        :rtype: BaseCompletion
+        """
+        response = await async_chatcompletion_with_backoff(
+            engine=self.get_model_name(),
+            messages=[
+                {"role": "user", "content": prompt[-MAX_PROMPT_LENGTH:]}
+            ],
+            timeout=1000,
+            temperature=self.params.temperature,
+            max_tokens=self.params.max_tokens,
+            top_p=self.params.top_p,
+            frequency_penalty=self.params.frequency_penalty,
+            presence_penalty=self.params.presence_penalty,
+            **kwargs
+        )
+        return BaseCompletion(state="success",
+                              content=response.choices[0].message["content"],
+                              prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                              completion_token=response.get("usage", {}).get("completion_tokens", 0))
+    def chat_completion(self, message: List[dict]) -> ChatCompletion:
+        """
+        Chat completion method for OpenAI GPT API.
+        :param message: The message to use for completion.
+        :type message: List[dict]
+        :return: ChatCompletion object.
+        :rtype: ChatCompletion
+        """
+        try:
+            response = openai.ChatCompletion.create(
+                engine=self.get_model_name(),  # GPT-4
+                messages=message,
+                timeout=1000,
+            )
+            return ChatCompletion(
+                state="success",
+                role=response.choices[0].message["role"],
+                content=response.choices[0].message["content"],
+                prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                completion_token=response.get("usage", {}).get("completion_tokens", 0),
+            )
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletion(state="error", content=exception)
+    def stream_chat_completion(self, message: List[dict], **kwargs):
+        """
+        Stream output chat completion for OpenAI GPT API.
+        :param message: The message (scratchpad) to use for completion. Usually contains json of role and content.
+        :type message: List[dict]
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: ChatCompletion object.
+        :rtype: ChatCompletion
+        """
+        try:
+            response = openai.ChatCompletion.create(
+                engine=self.get_model_name(),  # GPT-4
+                messages=message,
+                timeout=1000,
+                **kwargs,
+            )
+            role = next(response).choices[0].delta["role"]
+            messages = []
+            ## TODO: Calculate prompt_token and for stream mode
+            for resp in response:
+                messages.append(resp.choices[0].delta.get("content", ""))
+                yield ChatCompletion(
+                    state="success",
+                    role=role,
+                    content=messages[-1],
+                    prompt_token=0,
+                    completion_token=0,
+                )
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletion(state="error", content=exception)
+    def function_chat_completion(
+            self,
+            message: List[dict],
+            function_map: Dict[str, Callable],
+            function_schema: List[Dict],
+    ) -> ChatCompletionWithHistory:
+        """
+        Chat completion method for OpenAI GPT API.
+        :param message: The message to use for completion.
+        :type message: List[dict]
+        :param function_map: The function map to use for completion.
+        :type function_map: Dict[str, Callable]
+        :param function_schema: The function schema to use for completion.
+        :type function_schema: List[Dict]
+        :return: ChatCompletionWithHistory object.
+        :rtype: ChatCompletionWithHistory
+        """
+        assert len(function_schema) == len(function_map)
+        try:
+            response = openai.ChatCompletion.create(
+                engine=self.get_model_name(),  # GPT-4
+                messages=message,
+                functions=function_schema,
+                timeout=1000,
+            )
+            # response = openai.ChatCompletion.create(
+            #     n=self.params.n,
+            #     model=self.model_name,
+            #     messages=message,
+            #     functions=function_schema,
+            #     temperature=self.params.temperature,
+            #     max_tokens=self.params.max_tokens,
+            #     top_p=self.params.top_p,
+            #     frequency_penalty=self.params.frequency_penalty,
+            #     presence_penalty=self.params.presence_penalty,
+            # )
+            response_message = response.choices[0]["message"]
+            if response_message.get("function_call"):
+                function_name = response_message["function_call"]["name"]
+                fuction_to_call = function_map[function_name]
+                function_args = json.loads(
+                    response_message["function_call"]["arguments"]
+                )
+                function_response = fuction_to_call(**function_args)
+                # Postprocess function response
+                if isinstance(function_response, str):
+                    plugin_cost = 0
+                    plugin_token = 0
+                elif isinstance(function_response, AgentOutput):
+                    plugin_cost = function_response.cost
+                    plugin_token = function_response.token_usage
+                    function_response = function_response.output
+                else:
+                    raise Exception(
+                        "Invalid tool response type. Must be on of [AgentOutput, str]"
+                    )
+                message.append(dict(response_message))
+                message.append(
+                    {
+                        "role": "function",
+                        "name": function_name,
+                        "content": function_response,
+                    }
+                )
+                second_response = openai.ChatCompletion.create(
+                    model=self.get_model_name(),
+                    messages=message,
+                )
+                message.append(dict(second_response.choices[0].message))
+                return ChatCompletionWithHistory(
+                    state="success",
+                    role=second_response.choices[0].message["role"],
+                    content=second_response.choices[0].message["content"],
+                    prompt_token=response.get("usage", {}).get("prompt_tokens", 0)
+                                 + second_response.get("usage", {}).get("prompt_tokens", 0),
+                    completion_token=response.get("usage", {}).get(
+                        "completion_tokens", 0
+                    )
+                                     + second_response.get("usage", {}).get("completion_tokens", 0),
+                    message_scratchpad=message,
+                    plugin_cost=plugin_cost,
+                    plugin_token=plugin_token,
+                )
+            else:
+                message.append(dict(response_message))
+                return ChatCompletionWithHistory(
+                    state="success",
+                    role=response.choices[0].message["role"],
+                    content=response.choices[0].message["content"],
+                    prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                    completion_token=response.get("usage", {}).get(
+                        "completion_tokens", 0
+                    ),
+                    message_scratchpad=message,
+                )
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletionWithHistory(state="error", content=str(exception))
+    def function_chat_stream_completion(
+            self,
+            message: List[dict],
+            function_map: Dict[str, Callable],
+            function_schema: List[Dict],
+    ) -> ChatCompletionWithHistory:
+        assert len(function_schema) == len(function_map)
+        try:
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.get_model_name(),
+                messages=message,
+                functions=function_schema,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+                stream=True,
+            )
+            tmp = next(response)
+            role = tmp.choices[0].delta["role"]
+            _type = (
+                "function_call"
+                if tmp.choices[0].delta["content"] is None
+                else "content"
+            )
+            if _type == "function_call":
+                name = tmp.choices[0].delta["function_call"]["name"]
+                yield _type, ChatCompletionWithHistory(
+                    state="success",
+                    role=role,
+                    content="{" + f'"name":"{name}", "arguments":',
+                    message_scratchpad=message,
+                )
+            for resp in response:
+                # print(resp)
+                content = resp.choices[0].delta.get(_type, "")
+                if isinstance(content, dict):
+                    content = content["arguments"]
+                yield _type, ChatCompletionWithHistory(
+                    state="success",
+                    role=role,
+                    content=content,
+                    message_scratchpad=message,
+                )
+        except Exception as e:
+            logger.error(f"Failed to get response {str(e)}", exc_info=True)
+            raise e

src/infiagent/llm/client/llama.py ADDED Viewed

	@@ -0,0 +1,377 @@

+import json
+import logging
+import os
+from abc import ABC
+from typing import Callable, List
+import openai
+from tenacity import (  # for exponential backoff
+    before_sleep_log,
+    retry,
+    stop_after_attempt,
+    wait_random_exponential,
+)
+from ..base_llm import BaseLLM
+from ...schemas import *
+logger = logging.getLogger(__name__)
+MAX_PROMPT_LENGTH = 4096
+@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(5), reraise=True,
+       before_sleep=before_sleep_log(logger, logging.WARNING))
+def chatcompletion_with_backoff(**kwargs):
+    return openai.ChatCompletion.create(**kwargs)
+@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(5), reraise=True,
+       before_sleep=before_sleep_log(logger, logging.WARNING))
+async def async_chatcompletion_with_backoff(**kwargs):
+    async def _internal_coroutine():
+        return await openai.ChatCompletion.acreate(**kwargs)
+    return await _internal_coroutine()
+class LlamaOpenAIClient(BaseLLM, ABC):
+    """
+    Wrapper class for OpenAI GPT API collections.
+    :param model_name: The name of the model to use.
+    :type model_name: str
+    :param params: The parameters for the model.
+    :type params: LlamaParamModel
+    """
+    model_name: str
+    params: LlamaParamModel = LlamaParamModel()
+    def __init__(self, **data):
+        super().__init__(**data)
+        openai.api_key = ""
+        openai.api_base = "http://0.0.0.0:9729/v1"
+    @classmethod
+    async def create(cls, config_data):
+        return LlamaOpenAIClient(**config_data)
+    def get_model_name(self) -> str:
+        return self.model_name
+    def get_model_param(self) -> LlamaParamModel:
+        return self.params
+    def completion(self, prompt: str, **kwargs) -> BaseCompletion:
+        """
+        Completion method for OpenAI GPT API.
+        :param prompt: The prompt to use for completion.
+        :type prompt: str
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: BaseCompletion object.
+        :rtype: BaseCompletion
+        """
+        response = chatcompletion_with_backoff(
+            model=self.model_name,
+            messages=[
+                {"role": "user", "content": prompt[-MAX_PROMPT_LENGTH:]}
+            ],
+            timeout=1000,
+            # temperature=self.params.temperature,
+            # max_tokens=self.params.max_tokens,
+            # top_p=self.params.top_p,
+            # frequency_penalty=self.params.frequency_penalty,
+            # presence_penalty=self.params.presence_penalty,
+            # stop=["<|im_end|>", "<|endoftext|>"],
+            **kwargs
+        )
+        return BaseCompletion(state="success",
+                              content=response.choices[0].message["content"],
+                              prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                              completion_token=response.get("usage", {}).get("completion_tokens", 0))
+    async def async_completion(self, prompt: str, **kwargs) -> BaseCompletion:
+        """
+        Completion method for OpenAI GPT API.
+        :param prompt: The prompt to use for completion.
+        :type prompt: str
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: BaseCompletion object.
+        :rtype: BaseCompletion
+        """
+        response = await async_chatcompletion_with_backoff(
+            model=self.model_name,
+            messages=[
+                {"role": "user", "content": prompt[-MAX_PROMPT_LENGTH:]}
+            ],
+            timeout=1000,
+            #temperature=0.2,
+            #max_tokens=4096,
+            #top_p=0.9,
+            #frequency_penalty=self.params.frequency_penalty,
+            #presence_penalty=self.params.presence_penalty,
+            # stop=["<|im_end|>", "<|endoftext|>"],
+            **kwargs
+        )
+        return BaseCompletion(state="success",
+                              content=response.choices[0].message["content"],
+                              prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                              completion_token=response.get("usage", {}).get("completion_tokens", 0))
+    def chat_completion(self, message: List[dict]) -> ChatCompletion:
+        """
+        Chat completion method for OpenAI GPT API.
+        :param message: The message to use for completion.
+        :type message: List[dict]
+        :return: ChatCompletion object.
+        :rtype: ChatCompletion
+        """
+        try:
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.model_name,
+                timeout=1000,
+                messages=message,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+            )
+            return ChatCompletion(
+                state="success",
+                role=response.choices[0].message["role"],
+                content=response.choices[0].message["content"],
+                prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                completion_token=response.get("usage", {}).get("completion_tokens", 0),
+            )
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletion(state="error", content=exception)
+    def stream_chat_completion(self, message: List[dict], **kwargs):
+        """
+        Stream output chat completion for OpenAI GPT API.
+        :param message: The message (scratchpad) to use for completion. Usually contains json of role and content.
+        :type message: List[dict]
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: ChatCompletion object.
+        :rtype: ChatCompletion
+        """
+        try:
+            # response = openai.ChatCompletion.create(
+            #     engine=self.get_model_name(),  # GPT-4
+            #     messages=message,
+            #     timeout=1000,
+            #     **kwargs,
+            # )
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.model_name,
+                messages=message,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+                stream=True,
+                **kwargs
+            )
+            role = next(response).choices[0].delta["role"]
+            messages = []
+            ## TODO: Calculate prompt_token and for stream mode
+            for resp in response:
+                messages.append(resp.choices[0].delta.get("content", ""))
+                yield ChatCompletion(
+                    state="success",
+                    role=role,
+                    content=messages[-1],
+                    prompt_token=0,
+                    completion_token=0,
+                )
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletion(state="error", content=exception)
+    def function_chat_completion(
+            self,
+            message: List[dict],
+            function_map: Dict[str, Callable],
+            function_schema: List[Dict],
+    ) -> ChatCompletionWithHistory:
+        """
+        Chat completion method for OpenAI GPT API.
+        :param message: The message to use for completion.
+        :type message: List[dict]
+        :param function_map: The function map to use for completion.
+        :type function_map: Dict[str, Callable]
+        :param function_schema: The function schema to use for completion.
+        :type function_schema: List[Dict]
+        :return: ChatCompletionWithHistory object.
+        :rtype: ChatCompletionWithHistory
+        """
+        assert len(function_schema) == len(function_map)
+        try:
+            # response = openai.ChatCompletion.create(
+            #     engine=self.get_model_name(),  # GPT-4
+            #     messages=message,
+            #     functions=function_schema,
+            #     timeout=1000,
+            # )
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.model_name,
+                messages=message,
+                functions=function_schema,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+            )
+            response_message = response.choices[0]["message"]
+            if response_message.get("function_call"):
+                function_name = response_message["function_call"]["name"]
+                fuction_to_call = function_map[function_name]
+                function_args = json.loads(
+                    response_message["function_call"]["arguments"]
+                )
+                function_response = fuction_to_call(**function_args)
+                # Postprocess function response
+                if isinstance(function_response, str):
+                    plugin_cost = 0
+                    plugin_token = 0
+                elif isinstance(function_response, AgentOutput):
+                    plugin_cost = function_response.cost
+                    plugin_token = function_response.token_usage
+                    function_response = function_response.output
+                else:
+                    raise Exception(
+                        "Invalid tool response type. Must be on of [AgentOutput, str]"
+                    )
+                message.append(dict(response_message))
+                message.append(
+                    {
+                        "role": "function",
+                        "name": function_name,
+                        "content": function_response,
+                    }
+                )
+                second_response = openai.ChatCompletion.create(
+                    model=self.get_model_name(),
+                    messages=message,
+                )
+                message.append(dict(second_response.choices[0].message))
+                return ChatCompletionWithHistory(
+                    state="success",
+                    role=second_response.choices[0].message["role"],
+                    content=second_response.choices[0].message["content"],
+                    prompt_token=response.get("usage", {}).get("prompt_tokens", 0)
+                                 + second_response.get("usage", {}).get("prompt_tokens", 0),
+                    completion_token=response.get("usage", {}).get(
+                        "completion_tokens", 0
+                    )
+                                     + second_response.get("usage", {}).get("completion_tokens", 0),
+                    message_scratchpad=message,
+                    plugin_cost=plugin_cost,
+                    plugin_token=plugin_token,
+                )
+            else:
+                message.append(dict(response_message))
+                return ChatCompletionWithHistory(
+                    state="success",
+                    role=response.choices[0].message["role"],
+                    content=response.choices[0].message["content"],
+                    prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                    completion_token=response.get("usage", {}).get(
+                        "completion_tokens", 0
+                    ),
+                    message_scratchpad=message,
+                )
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletionWithHistory(state="error", content=str(exception))
+    def function_chat_stream_completion(
+            self,
+            message: List[dict],
+            function_map: Dict[str, Callable],
+            function_schema: List[Dict],
+    ) -> ChatCompletionWithHistory:
+        assert len(function_schema) == len(function_map)
+        try:
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.get_model_name(),
+                messages=message,
+                functions=function_schema,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+                stream=True,
+            )
+            tmp = next(response)
+            role = tmp.choices[0].delta["role"]
+            _type = (
+                "function_call"
+                if tmp.choices[0].delta["content"] is None
+                else "content"
+            )
+            if _type == "function_call":
+                name = tmp.choices[0].delta["function_call"]["name"]
+                yield _type, ChatCompletionWithHistory(
+                    state="success",
+                    role=role,
+                    content="{" + f'"name":"{name}", "arguments":',
+                    message_scratchpad=message,
+                )
+            for resp in response:
+                # print(resp)
+                content = resp.choices[0].delta.get(_type, "")
+                if isinstance(content, dict):
+                    content = content["arguments"]
+                yield _type, ChatCompletionWithHistory(
+                    state="success",
+                    role=role,
+                    content=content,
+                    message_scratchpad=message,
+                )
+            # result = ''.join(messages)
+            # if _type == "function_call":
+            #     result = json.loads(result)
+            #     function_name = result["name"]
+            #     fuction_to_call = function_map[function_name]
+            #     function_args = result["arguments"]
+            #     function_response = fuction_to_call(**function_args)
+            #
+            #     # Postprocess function response
+            #     if isinstance(function_response, AgentOutput):
+            #         function_response = function_response.output
+            #     message.append({"role": "function",
+            #                     "name": function_name,
+            #                     "content": function_response})
+            #     second_response = self.function_chat_stream_completion(message=message,function_map=function_map,function_schema=function_schema)
+            #     message.append(dict(second_response.choices[0].message))
+        except Exception as e:
+            logger.error(f"Failed to get response {str(e)}", exc_info=True)
+            raise e

src/infiagent/llm/client/openai.py ADDED Viewed

	@@ -0,0 +1,306 @@

+import json
+import os
+from abc import ABC
+from typing import Callable, List
+import openai
+from ..base_llm import BaseLLM
+from ...schemas import *
+class OpenAIGPTClient(BaseLLM, ABC):
+    """
+    Wrapper class for OpenAI GPT API collections.
+    :param model_name: The name of the model to use.
+    :type model_name: str
+    :param params: The parameters for the model.
+    :type params: OpenAIParamModel
+    """
+    model_name: str
+    params: OpenAIParamModel = OpenAIParamModel()
+    def __init__(self, **data):
+        super().__init__(**data)
+        openai.api_key = os.environ.get("OPENAI_API_KEY", "")
+    @classmethod
+    async def create(cls, config_data):
+        return OpenAIGPTClient(**config_data)
+    def get_model_name(self) -> str:
+        return self.model_name
+    def get_model_param(self) -> OpenAIParamModel:
+        return self.params
+    def completion(self, prompt: str, **kwargs) -> BaseCompletion:
+        """
+        Completion method for OpenAI GPT API.
+        :param prompt: The prompt to use for completion.
+        :type prompt: str
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: BaseCompletion object.
+        :rtype: BaseCompletion
+        """
+        try:
+            #TODO any full parameters support
+            response = openai.ChatCompletion.create(
+                # n=self.params['n'],
+                engine=self.model_name,
+                messages=[{"role": "user", "content": prompt}],
+                temperature=self.params['temperature'],
+                max_tokens=self.params['max_tokens'],
+                top_p=self.params['top_p'],
+                # frequency_penalty=self.params.frequency_penalty,
+                # presence_penalty=self.params.presence_penalty,
+                **kwargs
+            )
+            return BaseCompletion(state="success",
+                                  content=response.choices[0].message["content"],
+                                  prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                                  completion_token=response.get("usage", {}).get("completion_tokens", 0))
+        except Exception as exception:
+            print("Exception:", exception)
+            return BaseCompletion(state="error", content=exception)
+    async def async_completion(self, prompt: str, **kwargs) -> BaseCompletion:
+        """
+        Async Completion method for OpenAI GPT API.
+        :param prompt: The prompt to use for completion.
+        :type prompt: str
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: BaseCompletion object.
+        :rtype: BaseCompletion
+        """
+        try:
+            response = await openai.ChatCompletion.acreate(
+                model=self.model_name,
+                messages=[{"role": "user", "content": prompt}],
+                temperature=self.params['temperature'],
+                max_tokens=self.params['max_tokens'],
+                top_p=self.params['top_p'],
+                # frequency_penalty=self.params.frequency_penalty,
+                # presence_penalty=self.params.presence_penalty,
+                **kwargs
+            )
+            return BaseCompletion(state="success",
+                                  content=response.choices[0].message["content"],
+                                  prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                                  completion_token=response.get("usage", {}).get("completion_tokens", 0))
+        except Exception as exception:
+            print("Exception:", exception)
+            return BaseCompletion(state="error", content=exception)
+    def chat_completion(self, message: List[dict]) -> ChatCompletion:
+        """
+        Chat completion method for OpenAI GPT API.
+        :param message: The message to use for completion.
+        :type message: List[dict]
+        :return: ChatCompletion object.
+        :rtype: ChatCompletion
+        """
+        try:
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.model_name,
+                messages=message,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+            )
+            return ChatCompletion(state="success",
+                                  role=response.choices[0].message["role"],
+                                  content=response.choices[0].message["content"],
+                                  prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                                  completion_token=response.get("usage", {}).get("completion_tokens", 0))
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletion(state="error", content=exception)
+    def stream_chat_completion(self, message: List[dict],  **kwargs):
+        """
+        Stream output chat completion for OpenAI GPT API.
+        :param message: The message (scratchpad) to use for completion. Usually contains json of role and content.
+        :type message: List[dict]
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: ChatCompletion object.
+        :rtype: ChatCompletion
+        """
+        try:
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.model_name,
+                messages=message,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+                stream=True,
+                **kwargs
+            )
+            role = next(response).choices[0].delta["role"]
+            messages = []
+            ## TODO: Calculate prompt_token and for stream mode
+            for resp in response:
+                messages.append(resp.choices[0].delta.get("content", ""))
+                yield ChatCompletion(state="success",
+                                     role=role,
+                                     content=messages[-1],
+                                     prompt_token=0,
+                                     completion_token=0)
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletion(state="error", content=exception)
+    def function_chat_completion(self, message: List[dict],
+                                 function_map: Dict[str, Callable],
+                                 function_schema: List[Dict]) -> ChatCompletionWithHistory:
+        """
+        Chat completion method for OpenAI GPT API.
+        :param message: The message to use for completion.
+        :type message: List[dict]
+        :param function_map: The function map to use for completion.
+        :type function_map: Dict[str, Callable]
+        :param function_schema: The function schema to use for completion.
+        :type function_schema: List[Dict]
+        :return: ChatCompletionWithHistory object.
+        :rtype: ChatCompletionWithHistory
+        """
+        assert len(function_schema) == len(function_map)
+        try:
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.model_name,
+                messages=message,
+                functions=function_schema,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+            )
+            response_message = response.choices[0]["message"]
+            if response_message.get("function_call"):
+                function_name = response_message["function_call"]["name"]
+                fuction_to_call = function_map[function_name]
+                function_args = json.loads(response_message["function_call"]["arguments"])
+                function_response = fuction_to_call(**function_args)
+                # Postprocess function response
+                if isinstance(function_response, str):
+                    plugin_cost = 0
+                    plugin_token = 0
+                elif isinstance(function_response, AgentOutput):
+                    plugin_cost = function_response.cost
+                    plugin_token = function_response.token_usage
+                    function_response = function_response.output
+                else:
+                    raise Exception("Invalid tool response type. Must be on of [AgentOutput, str]")
+                message.append(dict(response_message))
+                message.append({"role": "function",
+                                "name": function_name,
+                                "content": function_response})
+                second_response = openai.ChatCompletion.create(
+                    model=self.model_name,
+                    messages=message,
+                )
+                message.append(dict(second_response.choices[0].message))
+                return ChatCompletionWithHistory(state="success",
+                                                 role=second_response.choices[0].message["role"],
+                                                 content=second_response.choices[0].message["content"],
+                                                 prompt_token=response.get("usage", {}).get("prompt_tokens", 0) +
+                                                              second_response.get("usage", {}).get("prompt_tokens", 0),
+                                                 completion_token=response.get("usage", {}).get("completion_tokens", 0) +
+                                                                  second_response.get("usage", {}).get("completion_tokens", 0),
+                                                 message_scratchpad=message,
+                                                 plugin_cost=plugin_cost,
+                                                 plugin_token=plugin_token,
+                                                 )
+            else:
+                message.append(dict(response_message))
+                return ChatCompletionWithHistory(state="success",
+                                                 role=response.choices[0].message["role"],
+                                                 content=response.choices[0].message["content"],
+                                                 prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                                                 completion_token=response.get("usage", {}).get("completion_tokens", 0),
+                                                 message_scratchpad=message)
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletionWithHistory(state="error", content=str(exception))
+    def function_chat_stream_completion(self, message: List[dict],
+                                        function_map: Dict[str, Callable],
+                                        function_schema: List[Dict]) -> ChatCompletionWithHistory:
+        assert len(function_schema) == len(function_map)
+        try:
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.model_name,
+                messages=message,
+                functions=function_schema,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+                stream=True
+            )
+            tmp = next(response)
+            role = tmp.choices[0].delta["role"]
+            _type = "function_call" if tmp.choices[0].delta["content"] is None else "content"
+            if _type == "function_call":
+                name = tmp.choices[0].delta['function_call']['name']
+                yield _type, ChatCompletionWithHistory(state="success", role=role,
+                                                       content="{" + f'"name":"{name}", "arguments":',
+                                                       message_scratchpad=message)
+            for resp in response:
+                # print(resp)
+                content = resp.choices[0].delta.get(_type, "")
+                if isinstance(content, dict):
+                    content = content['arguments']
+                yield _type, ChatCompletionWithHistory(state="success",
+                                                       role=role,
+                                                       content=content,
+                                                       message_scratchpad=message)
+            # result = ''.join(messages)
+            # if _type == "function_call":
+            #     result = json.loads(result)
+            #     function_name = result["name"]
+            #     fuction_to_call = function_map[function_name]
+            #     function_args = result["arguments"]
+            #     function_response = fuction_to_call(**function_args)
+            #
+            #     # Postprocess function response
+            #     if isinstance(function_response, AgentOutput):
+            #         function_response = function_response.output
+            #     message.append({"role": "function",
+            #                     "name": function_name,
+            #                     "content": function_response})
+            #     second_response = self.function_chat_stream_completion(message=message,function_map=function_map,function_schema=function_schema)
+            #     message.append(dict(second_response.choices[0].message))
+        except Exception as exception:
+            raise exception
+            print("Exception:", exception)
+            return ChatCompletion(state="error", content=str(exception))

src/infiagent/llm/client/opt.py ADDED Viewed

	@@ -0,0 +1,373 @@

+import json
+import logging
+import os
+from abc import ABC
+from typing import Callable, List
+import openai
+from tenacity import (  # for exponential backoff
+    before_sleep_log,
+    retry,
+    stop_after_attempt,
+    wait_random_exponential,
+)
+from ..base_llm import BaseLLM
+from ...schemas import *
+logger = logging.getLogger(__name__)
+MAX_PROMPT_LENGTH = 7000
+@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(10), reraise=True,
+       before_sleep=before_sleep_log(logger, logging.WARNING))
+def chatcompletion_with_backoff(**kwargs):
+    return openai.ChatCompletion.create(**kwargs)
+@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(10), reraise=True,
+       before_sleep=before_sleep_log(logger, logging.WARNING))
+async def async_chatcompletion_with_backoff(**kwargs):
+    async def _internal_coroutine():
+        return await openai.ChatCompletion.acreate(**kwargs)
+    return await _internal_coroutine()
+class OptOpenAIClient(BaseLLM, ABC):
+    """
+    Wrapper class for OpenAI GPT API collections.
+    :param model_name: The name of the model to use.
+    :type model_name: str
+    :param params: The parameters for the model.
+    :type params: OptParamModel
+    """
+    model_name: str
+    params: OptParamModel = OptParamModel()
+    def __init__(self, **data):
+        super().__init__(**data)
+        openai.api_key = "EMPTY"
+        openai.api_base = "http://localhost:8000/v1"
+    @classmethod
+    async def create(cls, config_data):
+        return OptOpenAIClient(**config_data)
+    def get_model_name(self) -> str:
+        return self.model_name
+    def get_model_param(self) -> OptParamModel:
+        return self.params
+    def completion(self, prompt: str, **kwargs) -> BaseCompletion:
+        """
+        Completion method for OpenAI GPT API.
+        :param prompt: The prompt to use for completion.
+        :type prompt: str
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: BaseCompletion object.
+        :rtype: BaseCompletion
+        """
+        response = chatcompletion_with_backoff(
+            model=self.model_name,
+            # engine=self.get_model_name(),  # GPT-4
+            messages=[
+                {"role": "user", "content": prompt[-MAX_PROMPT_LENGTH:]}
+            ],
+            timeout=1000,
+            **kwargs
+        )
+        return BaseCompletion(state="success",
+                              content=response.choices[0].message["content"],
+                              prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                              completion_token=response.get("usage", {}).get("completion_tokens", 0))
+    async def async_completion(self, prompt: str, **kwargs) -> BaseCompletion:
+        """
+        Completion method for OpenAI GPT API.
+        :param prompt: The prompt to use for completion.
+        :type prompt: str
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: BaseCompletion object.
+        :rtype: BaseCompletion
+        """
+        response = await async_chatcompletion_with_backoff(
+            # engine=self.get_model_name(),  # GPT-4
+            model=self.model_name,
+            messages=[
+                {"role": "user", "content": prompt[-MAX_PROMPT_LENGTH:]}
+            ],
+            timeout=1000,
+            **kwargs
+        )
+        return BaseCompletion(state="success",
+                              content=response.choices[0].message["content"],
+                              prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                              completion_token=response.get("usage", {}).get("completion_tokens", 0))
+    def chat_completion(self, message: List[dict]) -> ChatCompletion:
+        """
+        Chat completion method for OpenAI GPT API.
+        :param message: The message to use for completion.
+        :type message: List[dict]
+        :return: ChatCompletion object.
+        :rtype: ChatCompletion
+        """
+        try:
+            # response = openai.ChatCompletion.create(
+            #     engine=self.get_model_name(),  # GPT-4
+            #     messages=message,
+            #     timeout=1000,
+            # )
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.model_name,
+                messages=message,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+            )
+            return ChatCompletion(
+                state="success",
+                role=response.choices[0].message["role"],
+                content=response.choices[0].message["content"],
+                prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                completion_token=response.get("usage", {}).get("completion_tokens", 0),
+            )
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletion(state="error", content=exception)
+    def stream_chat_completion(self, message: List[dict], **kwargs):
+        """
+        Stream output chat completion for OpenAI GPT API.
+        :param message: The message (scratchpad) to use for completion. Usually contains json of role and content.
+        :type message: List[dict]
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: dict
+        :return: ChatCompletion object.
+        :rtype: ChatCompletion
+        """
+        try:
+            # response = openai.ChatCompletion.create(
+            #     engine=self.get_model_name(),  # GPT-4
+            #     messages=message,
+            #     timeout=1000,
+            #     **kwargs,
+            # )
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.model_name,
+                messages=message,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+                stream=True,
+                **kwargs
+            )
+            role = next(response).choices[0].delta["role"]
+            messages = []
+            ## TODO: Calculate prompt_token and for stream mode
+            for resp in response:
+                messages.append(resp.choices[0].delta.get("content", ""))
+                yield ChatCompletion(
+                    state="success",
+                    role=role,
+                    content=messages[-1],
+                    prompt_token=0,
+                    completion_token=0,
+                )
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletion(state="error", content=exception)
+    def function_chat_completion(
+            self,
+            message: List[dict],
+            function_map: Dict[str, Callable],
+            function_schema: List[Dict],
+    ) -> ChatCompletionWithHistory:
+        """
+        Chat completion method for OpenAI GPT API.
+        :param message: The message to use for completion.
+        :type message: List[dict]
+        :param function_map: The function map to use for completion.
+        :type function_map: Dict[str, Callable]
+        :param function_schema: The function schema to use for completion.
+        :type function_schema: List[Dict]
+        :return: ChatCompletionWithHistory object.
+        :rtype: ChatCompletionWithHistory
+        """
+        assert len(function_schema) == len(function_map)
+        try:
+            # response = openai.ChatCompletion.create(
+            #     engine=self.get_model_name(),  # GPT-4
+            #     messages=message,
+            #     functions=function_schema,
+            #     timeout=1000,
+            # )
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.model_name,
+                messages=message,
+                functions=function_schema,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+            )
+            response_message = response.choices[0]["message"]
+            if response_message.get("function_call"):
+                function_name = response_message["function_call"]["name"]
+                fuction_to_call = function_map[function_name]
+                function_args = json.loads(
+                    response_message["function_call"]["arguments"]
+                )
+                function_response = fuction_to_call(**function_args)
+                # Postprocess function response
+                if isinstance(function_response, str):
+                    plugin_cost = 0
+                    plugin_token = 0
+                elif isinstance(function_response, AgentOutput):
+                    plugin_cost = function_response.cost
+                    plugin_token = function_response.token_usage
+                    function_response = function_response.output
+                else:
+                    raise Exception(
+                        "Invalid tool response type. Must be on of [AgentOutput, str]"
+                    )
+                message.append(dict(response_message))
+                message.append(
+                    {
+                        "role": "function",
+                        "name": function_name,
+                        "content": function_response,
+                    }
+                )
+                second_response = openai.ChatCompletion.create(
+                    model=self.get_model_name(),
+                    messages=message,
+                )
+                message.append(dict(second_response.choices[0].message))
+                return ChatCompletionWithHistory(
+                    state="success",
+                    role=second_response.choices[0].message["role"],
+                    content=second_response.choices[0].message["content"],
+                    prompt_token=response.get("usage", {}).get("prompt_tokens", 0)
+                                 + second_response.get("usage", {}).get("prompt_tokens", 0),
+                    completion_token=response.get("usage", {}).get(
+                        "completion_tokens", 0
+                    )
+                                     + second_response.get("usage", {}).get("completion_tokens", 0),
+                    message_scratchpad=message,
+                    plugin_cost=plugin_cost,
+                    plugin_token=plugin_token,
+                )
+            else:
+                message.append(dict(response_message))
+                return ChatCompletionWithHistory(
+                    state="success",
+                    role=response.choices[0].message["role"],
+                    content=response.choices[0].message["content"],
+                    prompt_token=response.get("usage", {}).get("prompt_tokens", 0),
+                    completion_token=response.get("usage", {}).get(
+                        "completion_tokens", 0
+                    ),
+                    message_scratchpad=message,
+                )
+        except Exception as exception:
+            print("Exception:", exception)
+            return ChatCompletionWithHistory(state="error", content=str(exception))
+    def function_chat_stream_completion(
+            self,
+            message: List[dict],
+            function_map: Dict[str, Callable],
+            function_schema: List[Dict],
+    ) -> ChatCompletionWithHistory:
+        assert len(function_schema) == len(function_map)
+        try:
+            response = openai.ChatCompletion.create(
+                n=self.params.n,
+                model=self.get_model_name(),
+                messages=message,
+                functions=function_schema,
+                temperature=self.params.temperature,
+                max_tokens=self.params.max_tokens,
+                top_p=self.params.top_p,
+                frequency_penalty=self.params.frequency_penalty,
+                presence_penalty=self.params.presence_penalty,
+                stream=True,
+            )
+            tmp = next(response)
+            role = tmp.choices[0].delta["role"]
+            _type = (
+                "function_call"
+                if tmp.choices[0].delta["content"] is None
+                else "content"
+            )
+            if _type == "function_call":
+                name = tmp.choices[0].delta["function_call"]["name"]
+                yield _type, ChatCompletionWithHistory(
+                    state="success",
+                    role=role,
+                    content="{" + f'"name":"{name}", "arguments":',
+                    message_scratchpad=message,
+                )
+            for resp in response:
+                # print(resp)
+                content = resp.choices[0].delta.get(_type, "")
+                if isinstance(content, dict):
+                    content = content["arguments"]
+                yield _type, ChatCompletionWithHistory(
+                    state="success",
+                    role=role,
+                    content=content,
+                    message_scratchpad=message,
+                )
+            # result = ''.join(messages)
+            # if _type == "function_call":
+            #     result = json.loads(result)
+            #     function_name = result["name"]
+            #     fuction_to_call = function_map[function_name]
+            #     function_args = result["arguments"]
+            #     function_response = fuction_to_call(**function_args)
+            #
+            #     # Postprocess function response
+            #     if isinstance(function_response, AgentOutput):
+            #         function_response = function_response.output
+            #     message.append({"role": "function",
+            #                     "name": function_name,
+            #                     "content": function_response})
+            #     second_response = self.function_chat_stream_completion(message=message,function_map=function_map,function_schema=function_schema)
+            #     message.append(dict(second_response.choices[0].message))
+        except Exception as e:
+            logger.error(f"Failed to get response {str(e)}", exc_info=True)
+            raise e

src/infiagent/prompt/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .prompt_template import *
+from .simple_react_prompt import SimpleReactPrompt
+from .zero_shot_react_prompt import ZeroShotReactPrompt

src/infiagent/prompt/prompt_template.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""Prompt schema definition."""
+from abc import ABC, abstractmethod
+from string import Formatter
+from typing import Any, Dict, List, Optional, Tuple
+from pydantic import BaseModel, Extra, root_validator
+from ..exceptions.exceptions import InputErrorException
+from ..schemas import AgentAction, AgentObservation, BaseAgentResponse
+OBSERVATION_KEY = "Observation"
+THOUGHT_KEY = "Thought"
+FINAL_ANSWER_KEY = "FinalAnswer"
+DEFAULT_OBSERVATION = "Observation:"
+DEFAULT_THOUGHT = "Thought:"
+DEFAULT_FINAL_ANSWER = "Final Answer:"
+class PromptTemplate(BaseModel, ABC):
+    _input_variables: List[str]
+    _template: str
+    _keywords: Dict[str, str]
+    _name: str
+    _validate_template: bool
+    _skip_on_failure: bool
+    class Config:
+        extra = Extra.forbid
+    @property
+    def input_variables(self) -> List[str]:
+        return self._input_variables
+    @property
+    def template(self) -> str:
+        return self._template
+    @property
+    def keywords(self) -> Dict[str, str]:
+        return self._keywords
+    @property
+    def name(self) -> str:
+        return self._name
+    def format(self, **kwargs):
+        if not set(self._input_variables).issubset(kwargs.keys()):
+            missing_keys = set(self._input_variables) - kwargs.keys()
+            raise InputErrorException(f"Missing keys in prompt template: {', '.join(missing_keys)}")
+        filtered_kwargs = {key: kwargs[key] for key in self._input_variables if key in kwargs}
+        return self._template.format(**filtered_kwargs)
+    def construct_scratchpad(self, intermediate_steps: List[BaseAgentResponse]) -> str:
+        """Construct the scratchpad that lets the agent continue its thought process."""
+        thoughts = ""
+        for agent_response in intermediate_steps:
+            if isinstance(agent_response, AgentAction):
+                # for agent action, use thought
+                thoughts += agent_response.raw_output
+            elif isinstance(agent_response, AgentObservation):
+                # for agent observation use observation
+                thoughts += f"\n{self.keywords.get(OBSERVATION_KEY, DEFAULT_OBSERVATION)}\n" \
+                            f"{agent_response.formatted_output}\n\n" \
+                            f"{self.keywords.get(THOUGHT_KEY, DEFAULT_THOUGHT)}\n"
+        return thoughts
+    @classmethod
+    @root_validator(skip_on_failure=True)
+    def template_is_valid(cls, values: Dict) -> Dict:
+        """Check that template and input variables are consistent."""
+        if values["validate_template"]:
+            try:
+                dummy_input = {var: "" for var in values["input_variables"]}
+                Formatter().format(values["template"], **dummy_input)
+            except KeyError as e:
+                raise InputErrorException("Invalid prompt schema; check for mismatched or missing input parameters. ")\
+                    from e
+        return values

src/infiagent/prompt/simple_react_prompt.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from ..prompt import FINAL_ANSWER_KEY, OBSERVATION_KEY, THOUGHT_KEY, PromptTemplate
+class SimpleReactPrompt(PromptTemplate):
+    _input_variables = ["instruction", "agent_scratchpad"]
+    _template = "{instruction} \n{agent_scratchpad}"
+    _keywords = {
+        OBSERVATION_KEY: "[EOS]Observation:",
+        THOUGHT_KEY: "[SEP]",
+        FINAL_ANSWER_KEY: "[END]"
+    }
+    _name = 'SimpleReactPrompt'
+    _validate_template = True
+    _skip_on_failure = True
+    def __init__(self, **data):
+        super().__init__(**data)

src/infiagent/prompt/zero_shot_react_prompt.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from ..prompt import PromptTemplate, OBSERVATION_KEY, THOUGHT_KEY, FINAL_ANSWER_KEY, DEFAULT_OBSERVATION, \
+    DEFAULT_THOUGHT, DEFAULT_FINAL_ANSWER
+class ZeroShotReactPrompt(PromptTemplate):
+    _input_variables = ["instruction", "agent_scratchpad", "tool_names", "tool_description"]
+    _template = (
+        "Answer the following questions as best you can."
+        "You have access to the following tools:\n"
+        "{tool_description}.\n"
+        "Use the following format:\n\n"
+        "Question: the input question you must answer\n"
+        "Thought: you should always think about what to do\n\n"
+        "Action: the action to take, should be one of [{tool_names}]\n\n"
+        "Action Input:\n```python\n[the input to the action]\n```\n"
+        "Observation: the result of the action\n\n"
+        "... (this Thought/Action/Action Input/Observation can repeat N times)\n"
+        "Thought: I now know the final answer\n"
+        "Final Answer: the final answer to the original input question\n"
+        "If you have any files outputted write them to \"./\"\n"
+        "Do not use things like plot.show() as it will not work instead write them out \"./\"\n"
+        "Begin!\n\n"
+        "Question: {instruction}\nThought:\n"
+        "{agent_scratchpad}\n"
+    )
+    _keywords = {
+        OBSERVATION_KEY: DEFAULT_OBSERVATION,
+        THOUGHT_KEY: DEFAULT_THOUGHT,
+        FINAL_ANSWER_KEY: DEFAULT_FINAL_ANSWER
+    }
+    _name = 'ZeroShotReactPrompt'
+    _validate_template = True
+    _skip_on_failure = True
+    def __init__(self, **data):
+        super().__init__(**data)

src/infiagent/schemas/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .base_models import *
+from .complete_models import *
+from .sandbox_models import *
+from .agent_models import *
+from .llm_models import *

src/infiagent/schemas/agent_models.py ADDED Viewed

	@@ -0,0 +1,148 @@

+from __future__ import annotations
+import abc
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import List, NamedTuple, Optional, Union
+from pydantic import BaseModel
+from ..schemas.sandbox_models import *
+@dataclass
+class BaseAgentResponse:
+    """Base Agent step result, contains formatted output string."""
+    formatted_output: str
+    raw_output: str
+@dataclass
+class AgentAction(BaseAgentResponse):
+    """
+    Agent's action to take.
+    """
+    tool: str
+    tool_input: Union[str, dict]
+@dataclass
+class AgentObservation(BaseAgentResponse):
+    """
+    Agent's action to take.
+    """
+    tool: str
+@dataclass
+class AgentFinish(BaseAgentResponse):
+    """Agent's return value when finishing execution."""
+    pass
+class AgentType(Enum):
+    """
+    Enumerated type for agent types.
+    """
+    openai = "openai"
+    react = "react"
+    rewoo = "rewoo"
+    vanilla = "vanilla"
+    openai_memory = "openai_memory"
+    @staticmethod
+    def get_agent_class(_type: AgentType):
+        """
+        Get agent class from agent type.
+        :param _type: agent type
+        :return: agent class
+        """
+        if _type == AgentType.react:
+            from ..agent.react import ReactAgent
+            return ReactAgent
+        else:
+            raise ValueError(f"Unknown agent type: {_type}")
+class AgentOutput(BaseModel):
+    """
+    Pydantic model for agent output.
+    """
+    output: str
+    cost: float
+    token_usage: int
+@dataclass
+class AgentRequest:
+    sandbox_id: Optional[str] = None
+    messages: List[Message] = field(default_factory=list)
+    input_files: List[MediaFile] = field(default_factory=list)
+    sandbox_status: Optional[SandboxStatus] = None
+    is_cn: bool = False
+@dataclass
+class AgentResponse:
+    output_text: str
+    raw_output_text: str
+    output_files: List[MediaFile] = field(default_factory=list)
+    sandbox_id: Optional[str] = None
+    sandbox_status: Optional[SandboxStatus] = None
+    turn_level_prompt: Optional[List[str]] = None
+    turn_level_response: Optional[List[str]] = None
+class RoleType(Enum):
+    User = 0
+    System = 1
+    Agent = 2
+    @classmethod
+    def _missing_(cls, name):
+        # If the input is a string, perform case-insensitive matching
+        if isinstance(name, str):
+            for member in cls:
+                if member.name.lower() == name.lower():
+                    return member
+        return super()._missing_(name)
+@dataclass
+class Message(abc.ABC):
+    role: RoleType
+    content: str
+    raw_content: str = ""
+    @staticmethod
+    def parse_from_dict(data):
+        data['role'] = RoleType(data['role'])
+        # Add a check for raw_content in legacy data
+        if 'raw_content' not in data:
+            data['raw_content'] = ""
+        return Message(**data)
+    def to_dict(self):
+        role_value = self.role.value if isinstance(self.role, RoleType) else self.role
+        return {
+            "role": role_value,
+            "content": self.content,  # Fixed the missing comma here
+            "raw_content": self.raw_content
+        }
+@dataclass
+class MediaFile:
+    file_name: Optional[str] = None
+    file_content: Optional[bytes] = None
+    tos_path: Optional[str] = None
+    sandbox_path: Optional[str] = None
+    def __dict__(self):
+        return {
+            'file_name': self.file_name if self.file_name is not None else "",
+            'file_content': self.file_content if self.file_content is not None else "",
+            'tos_path': self.tos_path if self.tos_path is not None else "",
+            'sandbox_path': self.sandbox_path if self.sandbox_path is not None else "",
+        }

src/infiagent/schemas/base_models.py ADDED Viewed

File without changes

src/infiagent/schemas/complete_models.py ADDED Viewed

	@@ -0,0 +1,236 @@

+# coding: utf-8
+from datetime import datetime
+from time import time
+from typing import Any, Dict, List, Optional, Union
+from pydantic import BaseModel
+from ..schemas.agent_models import Message
+from ..utils.file_utils import get_file_name_and_path
+# Definitions for inputs and outputs schema for /complete api
+DEFAULT_TOP_P = 0.7
+DEFAULT_TEMPERATURE = 1.0
+DEFAULT_STREAM = False
+FINISH_STATUS = "FINISH"
+FAILED_STATUS = "FAILED"
+PROCESSING_STATUS = "PROCESSING"
+ASSISTANT = "assistant"
+# Main Input Model
+class ChatCompleteRequest(BaseModel):
+    chat_id: str  # unique chat id for given chat
+    code_interpreter: Optional[dict] = {}
+    messages: List[dict] = []  # chat message
+    model: str = "AZURE_OPEN_AI"  # model name map to LLM conf
+    user: str
+    max_tokens: Optional[int] = None
+    message_conf: Optional[dict] = {}
+    n: Optional[int] = None
+    plugins: Optional[List[str]] = None
+    seed_conf: Optional[dict] = {}
+    stream: Optional[bool] = None
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    top_k: Optional[int] = None
+    webgpt: Optional[Dict[str, Any]] = None
+    webgpt_network: Optional[bool] = None
+class MessageConf(BaseModel):
+    top_p: float = DEFAULT_TOP_P
+    temperature: float = DEFAULT_TEMPERATURE
+    top_k: Optional[int] = None
+    time_cost: int
+    code_interpreter: dict
+    gpt_engine_conf: dict
+    stream: bool
+class Delta(BaseModel):
+    role: str
+    content: str
+    sid: str
+    status: str
+    end_turn: bool
+    parent_id: str
+    children_ids: Optional[Union[List[str], None]]
+    err_msg: str
+    creator: str
+    updater: str
+    ctime: str
+    utime: str
+    message_conf: MessageConf
+    def json(self, *args, **kwargs):
+        serialized_data = super().json(*args, **kwargs)
+        return serialized_data.replace("+00:00", "Z")
+class Choice(BaseModel):
+    index: int
+    delta: Delta
+    finish_reason: str
+class ChatCompleteResponse(BaseModel):
+    id: str
+    created: int
+    choices: List[Choice]
+def chat_request_to_message_conf(chat_request: ChatCompleteRequest) -> MessageConf:
+    input_files = {}
+    if chat_request.code_interpreter and "tos_key" in chat_request.code_interpreter:
+        input_file = chat_request.code_interpreter["tos_key"]
+        file_name, tos_path = get_file_name_and_path(input_file)
+        input_files = {"tos_key": file_name}
+    return MessageConf(
+        top_p=chat_request.top_p if chat_request.top_p is not None else DEFAULT_TOP_P,
+        temperature=chat_request.temperature if chat_request.temperature is not None else DEFAULT_TEMPERATURE,
+        code_interpreter=input_files,
+        time_cost=0,
+        gpt_engine_conf={},
+        stream=chat_request.stream if chat_request.stream is not None else DEFAULT_STREAM
+    )
+def chat_request_to_deltas(chat_request: ChatCompleteRequest) -> List[Delta]:
+    deltas = []
+    message_conf = chat_request_to_message_conf(chat_request)
+    for message in chat_request.messages:
+        delta = Delta(
+            role=ASSISTANT,
+            content=message["content"],
+            sid="",
+            status="FINISH",
+            end_turn=False,
+            parent_id="",
+            children_ids=None,
+            err_msg="",
+            creator=chat_request.user,
+            updater=chat_request.user,
+            ctime=current_utc_time_as_str(),
+            utime=current_utc_time_as_str(),
+            message_conf=message_conf
+        )
+        deltas.append(delta)
+    return deltas
+def chat_request_to_choices(chat_request: ChatCompleteRequest) -> List[Choice]:
+    deltas = chat_request_to_deltas(chat_request)
+    choices = []
+    for index, delta in enumerate(deltas):
+        choice = Choice(
+            index=index,
+            delta=delta,
+            finish_reason="stop"
+        )
+        choices.append(choice)
+    return choices
+def chat_request_to_response(chat_request: ChatCompleteRequest) -> ChatCompleteResponse:
+    return ChatCompleteResponse(
+        id=chat_request.chat_id,
+        created=int(time()),
+        choices=chat_request_to_choices(chat_request)
+    )
+def update_chat_response_with_message(chat_response: ChatCompleteResponse,
+                                      message: Message,
+                                      status: Union[str, None] = None) -> ChatCompleteResponse:
+    # Get the last Delta (if exists)
+    last_delta = chat_response.choices[-1].delta if chat_response.choices else None
+    updated_delta = Delta(
+        role=ASSISTANT,  # map with front end
+        content=message.content,
+        sid=last_delta.sid if last_delta else "",
+        status=status if status is not None else FINISH_STATUS,
+        end_turn=False,
+        parent_id=last_delta.parent_id if last_delta else "",
+        children_ids=last_delta.children_ids if last_delta else None,
+        err_msg="",
+        creator=last_delta.creator if last_delta else None,
+        updater=last_delta.updater if last_delta else None,
+        ctime=last_delta.ctime if last_delta else current_utc_time_as_str(),
+        utime=current_utc_time_as_str(),
+        message_conf=MessageConf(
+            top_p=last_delta.message_conf.top_p if last_delta and last_delta.message_conf.top_p else DEFAULT_TOP_P,
+            temperature=last_delta.message_conf.temperature if last_delta and last_delta.message_conf.temperature else
+            DEFAULT_TEMPERATURE,
+            code_interpreter=last_delta.message_conf.code_interpreter
+            if last_delta and last_delta.message_conf.code_interpreter else {},
+            time_cost=0,
+            gpt_engine_conf={},
+            stream=last_delta.message_conf.stream if last_delta and last_delta.message_conf.stream is not None else
+            False
+        )
+    )
+    updated_choice = Choice(
+        index=0,  # Since it's the only choice in the list
+        delta=updated_delta,
+        finish_reason="stop"
+    )
+    # Update the ChatCompleteResponse to contain only the new Choice
+    chat_response.choices = [updated_choice]
+    return chat_response
+def current_utc_time_as_str() -> str:
+    return datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
+def create_empty_response():
+    # Dummy instance for Delta
+    delta = Delta(
+        role=ASSISTANT,
+        content="",
+        sid="",
+        status="",
+        end_turn=False,
+        parent_id="",
+        children_ids=None,
+        err_msg="",
+        creator="",
+        updater="",
+        ctime="",
+        utime="",
+        message_conf=MessageConf(
+            top_p=0.0,
+            temperature=0,
+            time_cost=0,
+            code_interpreter={},
+            gpt_engine_conf={},
+            stream=False
+        )
+    )
+    # Dummy instance for Choice
+    choice = Choice(
+        index=0,
+        delta=delta,
+        finish_reason=""
+    )
+    # Dummy instance for ChatCompleteResponse
+    response = ChatCompleteResponse(
+        id="",
+        created=0,
+        choices=[choice]
+    )
+    return response

src/infiagent/schemas/llm_models.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from enum import Enum
+from typing import Dict, List, NamedTuple, Union
+from pydantic import BaseModel
+try:
+    import torch
+except ImportError:
+    pass
+class BaseCompletion(BaseModel):
+    state: str  # "success" or "error"
+    content: str
+    prompt_token: int = 0
+    completion_token: int = 0
+    def to_dict(self):
+        return dict(
+            state=self.state,
+            content=self.content,
+            prompt_token=self.prompt_token,
+            completion_token=self.completion_token,
+        )
+class ChatCompletion(BaseCompletion):
+    role: str = "assistant"  # "system" or "user" or "assistant"
+class ChatCompletionWithHistory(ChatCompletion):
+    """Used for function call API"""
+    message_scratchpad: List[Dict] = []
+    plugin_cost: float = 0.0
+    plugin_token: float = 0.0
+class BaseParamModel(BaseModel):
+    def __eq__(self, other):
+        return self.dict() == other.dict()
+class OpenAIParamModel(BaseModel):
+    """
+    OpenAI API parameters
+    """
+    max_tokens: int = 2048
+    temperature: float = 0.2
+    top_p: float = 1.0
+    presence_penalty: float = 0.0
+    frequency_penalty: float = 0.0
+    n: int = 1
+    stop: list = []
+class AzureOpenAIParamModel(BaseModel):
+    """
+    AzureOpenAI API parameters
+    """
+    max_tokens: int = 2048
+    temperature: float = 0.2
+    top_p: float = 1.0
+    presence_penalty: float = 0.0
+    frequency_penalty: float = 0.0
+    n: int = 1
+    stop: list = []
+class LlamaParamModel(BaseModel):
+    """
+    AzureOpenAI API parameters
+    """
+    max_tokens: int = 4096
+    temperature: float = 0.2
+    top_p: float = 1.0
+    presence_penalty: float = 0.0
+    frequency_penalty: float = 0.0
+    n: int = 1
+    stop: list = []
+class OptParamModel(BaseModel):
+    """
+    AzureOpenAI API parameters
+    """
+    max_tokens: int = 2048
+    temperature: float = 0.2
+    top_p: float = 1.0
+    n: int = 1
+    stop: list = []

src/infiagent/schemas/sandbox_models.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from enum import Enum
+from typing import Any, List, Optional
+from pydantic import BaseModel
+class SandboxStatus(Enum):
+    """
+    Enumerated type for agent types.
+    """
+    success = "success"
+    failed = "failed"
+    timeout = "timeout"
+class CodeOutput(BaseModel):
+    type: str
+    content: str
+class ReturnedFile(BaseModel):
+    download_link: str
+    name: str
+    path: str
+class CodeRunResult(BaseModel):
+    code_output_result: List[CodeOutput]
+    deleted_files: List[ReturnedFile]
+    new_generated_files: List[ReturnedFile]
+class CodeRunData(BaseModel):
+    is_partial: bool
+    result: CodeRunResult
+class RunCodeOutput(BaseModel):
+    code: int
+    message: str
+    data: Optional[CodeRunData]
+class CreateSessionOutput(BaseModel):
+    code: int
+    message: str
+class ErrorResponse(BaseModel):
+    code: int
+    message: str
+    data: Optional[Any]
+class UploadOutput(BaseModel):
+    code: int
+    message: Optional[str]
+    data: Optional[str]
+# Model for successful response (assuming it's a text file for this example)
+class DownloadSuccessOutput(BaseModel):
+    file_name: str  # this is not part of server response. We must fill this field in client.
+    content: str
+class HeartbeatOutput(BaseModel):
+    code: Optional[int]
+    message: Optional[str]
+class RefreshSandboxOutput(BaseModel):
+    code: Optional[int]
+    message: Optional[str]

src/infiagent/services/__init__.py ADDED Viewed

File without changes

src/infiagent/services/chat_complete_service.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import time
+from io import BytesIO
+from typing import Any, Dict, List, Union
+from fastapi import UploadFile
+from starlette.datastructures import UploadFile as StarletteUploadFile
+from werkzeug.datastructures import FileStorage
+from ..conversation_sessions import CodeInterpreterSession
+from ..exceptions.exceptions import (
+    DependencyException,
+    InputErrorException,
+    InternalErrorException,
+    ModelMaxIterationsException,
+)
+from ..schemas import Message, RoleType
+from ..utils import get_logger
+from ..tools import AsyncPythonSandBoxTool
+logger = get_logger()
+async def predict(
+        prompt: str,
+        model_name: str,
+        config_path: str,
+        uploaded_files: Any,
+        **kwargs: Dict[str, Any]):
+    start_time = time.time()
+    # create new session
+    session = await CodeInterpreterSession.create(
+        model_name=model_name,
+        config_path=config_path,
+        **kwargs
+    )
+    files = upload_files(uploaded_files, session.session_id)
+    logger.info(f"Session Creation Latency: {time.time() - start_time}")
+    # upload file
+    if isinstance(files, str):
+        logger.info(f"Upload {files} as file path")
+        await session.upload_to_sandbox(files)
+    # upload list of file
+    elif isinstance(files, list):
+        for file in files:
+            if isinstance(file, str):
+                await session.upload_to_sandbox(file)
+            elif isinstance(file, UploadFile) or isinstance(file, StarletteUploadFile):
+                file_content = file.file.read()  # get file content
+                file_like_object = BytesIO(file_content)
+                file_storage = FileStorage(
+                    stream=file_like_object,
+                    filename=file.filename,
+                    content_type=file.content_type
+                )
+                await session.upload_to_sandbox(file_storage)
+            else:
+                raise InputErrorException("The file type {} not supported, can't be uploaded".format(type(file)))
+    # chat
+    try:
+        logger.info(f"Instruction message: {prompt}")
+        content = None
+        output_files = []
+        user_messages = [Message(RoleType.User, prompt)]
+        async for response in session.chat(user_messages):
+            logger.info(f'Session Chat Response: {response}')
+            if content is None:
+                content = response.output_text
+            else:
+                content += response.output_text
+            output_files.extend([output_file.__dict__() for output_file in response.output_files])
+        session.messages.append(Message(RoleType.Agent, content))
+        AsyncPythonSandBoxTool.kill_kernels(session.session_id)
+        logger.info(f"Release python sandbox {session.session_id}")
+        logger.info(f"Total Latency: {time.time() - start_time}")
+        return content
+    except (ModelMaxIterationsException, DependencyException, InputErrorException, InternalErrorException, Exception) \
+            as e:
+        exception_messages = {
+            ModelMaxIterationsException: "Sorry. The agent didn't find the correct answer after multiple trials, "
+                                         "Please try another question.",
+            DependencyException: "Agent failed to process message due to dependency issue. You can try it later. "
+                                 "If it still happens, please contact oncall.",
+            InputErrorException: "Agent failed to process message due to value issue. If you believe all input are "
+                                 "correct, please contact oncall.",
+            InternalErrorException: "Agent failed to process message due to internal error, please contact oncall.",
+            Exception: "Agent failed to process message due to unknown error, please contact oncall."
+        }
+        err_msg = exception_messages.get(type(e), f"Unknown error occurred: {str(e)}")
+        logger.error(err_msg, exc_info=True)
+        raise Exception(err_msg)
+import time
+from typing import Union, List, Any, Dict
+from io import BytesIO
+from fastapi import UploadFile
+from starlette.datastructures import UploadFile as StarletteUploadFile
+from ..conversation_sessions import CodeInterpreterSession
+from ..schemas import (
+    Message,
+    RoleType
+)
+from werkzeug.datastructures import FileStorage
+from ..exceptions.exceptions import InputErrorException, DependencyException, InternalErrorException, \
+    ModelMaxIterationsException
+from ..utils import get_logger, upload_files
+logger = get_logger()
+async def predict(
+        prompt: str,
+        model_name: str,
+        uploaded_files: Any,
+        **kwargs: Dict[str, Any]):
+    start_time = time.time()
+    # create new session
+    session = await CodeInterpreterSession.create(
+        model_name=model_name,
+        **kwargs
+    )
+    files = upload_files(uploaded_files, session.session_id)
+    logger.info(f"Session Creation Latency: {time.time() - start_time}")
+    # upload file
+    if isinstance(files, str):
+        logger.info(f"Upload {files} as file path")
+        await session.upload_to_sandbox(files)
+    # upload list of file
+    elif isinstance(files, list):
+        for file in files:
+            if isinstance(file, str):
+                await session.upload_to_sandbox(file)
+            elif isinstance(file, UploadFile) or isinstance(file, StarletteUploadFile):
+                file_content = file.file.read()  # get file content
+                file_like_object = BytesIO(file_content)
+                file_storage = FileStorage(
+                    stream=file_like_object,
+                    filename=file.filename,
+                    content_type=file.content_type
+                )
+                await session.upload_to_sandbox(file_storage)
+            else:
+                raise InputErrorException("The file type {} not supported, can't be uploaded".format(type(file)))
+    # chat
+    try:
+        logger.info(f"Instruction message: {prompt}")
+        content = None
+        output_files = []
+        user_messages = [Message(RoleType.User, prompt)]
+        async for response in session.chat(user_messages):
+            logger.info(f'Session Chat Response: {response}')
+            if content is None:
+                content = response.output_text
+            else:
+                content += response.output_text
+            output_files.extend([output_file.__dict__() for output_file in response.output_files])
+        session.messages.append(Message(RoleType.Agent, content))
+        logger.info(f"Total Latency: {time.time() - start_time}")
+        return content
+    except (ModelMaxIterationsException, DependencyException, InputErrorException, InternalErrorException, Exception) \
+            as e:
+        exception_messages = {
+            ModelMaxIterationsException: "Sorry. The agent didn't find the correct answer after multiple trials, "
+                                         "Please try another question.",
+            DependencyException: "Agent failed to process message due to dependency issue. You can try it later. "
+                                 "If it still happens, please contact oncall.",
+            InputErrorException: "Agent failed to process message due to value issue. If you believe all input are "
+                                 "correct, please contact oncall.",
+            InternalErrorException: "Agent failed to process message due to internal error, please contact oncall.",
+            Exception: "Agent failed to process message due to unknown error, please contact oncall."
+        }
+        err_msg = exception_messages.get(type(e), f"Unknown error occurred: {str(e)}")
+        logger.error(err_msg, exc_info=True)
+        raise Exception(err_msg)