Spaces:
Sleeping
Sleeping
Upload 10 files
#1
by
adhamyounes
- opened
- app.py +704 -44
- deploy-docs.yml +59 -0
- dummy-agent-test.yml +33 -0
- ghcr.yml +82 -0
- lint.yml +57 -0
- review-pr.yml +69 -0
- run-integration-tests.yml +104 -0
- run-unit-tests.yml +129 -0
- solve-issue.yml +109 -0
- stale.yml +29 -0
app.py
CHANGED
|
@@ -1,46 +1,706 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
else:
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
#
|
| 4 |
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
| 5 |
+
# copy of this software and associated documentation files (the "Software"),
|
| 6 |
+
# to deal in the Software without restriction, including without limitation
|
| 7 |
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
| 8 |
+
# and/or sell copies of the Software, and to permit persons to whom the
|
| 9 |
+
# Software is furnished to do so, subject to the following conditions:
|
| 10 |
+
#
|
| 11 |
+
# The above copyright notice and this permission notice shall be included in
|
| 12 |
+
# all copies or substantial portions of the Software.
|
| 13 |
+
#
|
| 14 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 16 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
| 17 |
+
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 18 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
| 19 |
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
| 20 |
+
# DEALINGS IN THE SOFTWARE.
|
| 21 |
+
import os
|
| 22 |
+
import sys
|
| 23 |
+
import time
|
| 24 |
+
import calendar
|
| 25 |
+
import json
|
| 26 |
+
from model_setup_manager import download_model_by_name, build_engine_by_name
|
| 27 |
+
import logging
|
| 28 |
+
import gc
|
| 29 |
+
import torch
|
| 30 |
+
from pathlib import Path
|
| 31 |
+
from trt_llama_api import TrtLlmAPI
|
| 32 |
+
from whisper.trt_whisper import WhisperTRTLLM, decode_audio_file
|
| 33 |
+
#from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
| 34 |
+
#from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
| 35 |
+
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
| 36 |
+
from collections import defaultdict
|
| 37 |
+
from llama_index import ServiceContext
|
| 38 |
+
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
|
| 39 |
+
from llama_index import set_global_service_context
|
| 40 |
+
from faiss_vector_storage import FaissEmbeddingStorage
|
| 41 |
+
from ui.user_interface import MainInterface
|
| 42 |
+
from scipy.io import wavfile
|
| 43 |
+
import scipy.signal as sps
|
| 44 |
+
import numpy as np
|
| 45 |
+
from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
|
| 46 |
+
from CLIP import run_model_on_images, CLIPEmbeddingStorageEngine
|
| 47 |
+
from PIL import Image
|
| 48 |
+
from transformers import CLIPProcessor, CLIPModel
|
| 49 |
+
import shutil
|
| 50 |
+
from llm_prompt_templates import LLMPromptTemplate
|
| 51 |
+
from utils import (read_model_name)
|
| 52 |
+
import win32api
|
| 53 |
+
import win32security
|
| 54 |
|
| 55 |
+
selected_CLIP = False
|
| 56 |
+
clip_engine = None
|
| 57 |
+
selected_ChatGLM = False
|
| 58 |
+
app_config_file = 'config\\app_config.json'
|
| 59 |
+
model_config_file = 'config\\config.json'
|
| 60 |
+
preference_config_file = 'config\\preferences.json'
|
| 61 |
+
data_source = 'directory'
|
| 62 |
+
|
| 63 |
+
# Use GetCurrentProcess to get a handle to the current process
|
| 64 |
+
hproc = win32api.GetCurrentProcess()
|
| 65 |
+
# Use GetCurrentProcessToken to get the token of the current process
|
| 66 |
+
htok = win32security.OpenProcessToken(hproc, win32security.TOKEN_QUERY)
|
| 67 |
+
|
| 68 |
+
# Retrieve the list of privileges enabled
|
| 69 |
+
privileges = win32security.GetTokenInformation(htok, win32security.TokenPrivileges)
|
| 70 |
+
|
| 71 |
+
# Iterate over privileges and output the ones that are enabled
|
| 72 |
+
priv_list = []
|
| 73 |
+
for priv_id, priv_flags in privileges:
|
| 74 |
+
# Check if privilege is enabled
|
| 75 |
+
if priv_flags == win32security.SE_PRIVILEGE_ENABLED or win32security.SE_PRIVILEGE_ENABLED_BY_DEFAULT:
|
| 76 |
+
# Lookup the name of the privilege
|
| 77 |
+
priv_name = win32security.LookupPrivilegeName(None, priv_id)
|
| 78 |
+
priv_list.append(priv_name)
|
| 79 |
+
|
| 80 |
+
print(f"Privileges of app process: {priv_list}")
|
| 81 |
+
|
| 82 |
+
def read_config(file_name):
|
| 83 |
+
try:
|
| 84 |
+
with open(file_name, 'r', encoding='utf8') as file:
|
| 85 |
+
return json.load(file)
|
| 86 |
+
except FileNotFoundError:
|
| 87 |
+
print(f"The file {file_name} was not found.")
|
| 88 |
+
except json.JSONDecodeError:
|
| 89 |
+
print(f"There was an error decoding the JSON from the file {file_name}.")
|
| 90 |
+
except Exception as e:
|
| 91 |
+
print(f"An unexpected error occurred: {e}")
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def get_model_config(config, model_name=None):
|
| 96 |
+
selected_model = next((model for model in config["models"]["supported"] if model["name"] == model_name),
|
| 97 |
+
config["models"]["supported"][0])
|
| 98 |
+
metadata = selected_model["metadata"]
|
| 99 |
+
|
| 100 |
+
cwd = os.getcwd() # Current working directory, to avoid calling os.getcwd() multiple times
|
| 101 |
+
|
| 102 |
+
if "ngc_model_name" in selected_model:
|
| 103 |
+
return {
|
| 104 |
+
"model_path": os.path.join(cwd, "model", selected_model["id"], "engine") if "id" in selected_model else None,
|
| 105 |
+
"engine": metadata.get("engine", None),
|
| 106 |
+
"tokenizer_path": os.path.join(cwd, "model", selected_model["id"] ,selected_model["prerequisite"]["tokenizer_local_dir"] ) if "tokenizer_local_dir" in selected_model["prerequisite"] else None,
|
| 107 |
+
"vocab": os.path.join(cwd, "model", selected_model["id"] ,selected_model["prerequisite"]["vocab_local_dir"], selected_model["prerequisite"]["tokenizer_files"]["vocab_file"]) if "vocab_local_dir" in selected_model["prerequisite"] else None,
|
| 108 |
+
"max_new_tokens": metadata.get("max_new_tokens", None),
|
| 109 |
+
"max_input_token": metadata.get("max_input_token", None),
|
| 110 |
+
"temperature": metadata.get("temperature", None),
|
| 111 |
+
"prompt_template": metadata.get("prompt_template", None)
|
| 112 |
+
}
|
| 113 |
+
elif "hf_model_name" in selected_model:
|
| 114 |
+
return {
|
| 115 |
+
"model_path": os.path.join(cwd, "model", selected_model["id"]) if "id" in selected_model else None,
|
| 116 |
+
"tokenizer_path": os.path.join(cwd, "model", selected_model["id"]) if "id" in selected_model else None,
|
| 117 |
+
"prompt_template": metadata.get("prompt_template", None)
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def get_asr_model_config(config, model_name=None):
|
| 122 |
+
models = config["models"]["supported_asr"]
|
| 123 |
+
selected_model = next((model for model in models if model["name"] == model_name), models[0])
|
| 124 |
+
return {
|
| 125 |
+
"model_path": os.path.join(os.getcwd(), selected_model["metadata"]["model_path"]),
|
| 126 |
+
"assets_path": os.path.join(os.getcwd(), selected_model["metadata"]["assets_path"])
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
def get_data_path(config):
|
| 130 |
+
return os.path.join(os.getcwd(), config["dataset"]["path"])
|
| 131 |
+
|
| 132 |
+
# read the app specific config
|
| 133 |
+
app_config = read_config(app_config_file)
|
| 134 |
+
streaming = app_config["streaming"]
|
| 135 |
+
similarity_top_k = app_config["similarity_top_k"]
|
| 136 |
+
is_chat_engine = app_config["is_chat_engine"]
|
| 137 |
+
embedded_model_name = app_config["embedded_model"]
|
| 138 |
+
embedded_model = os.path.join(os.getcwd(), "model", embedded_model_name)
|
| 139 |
+
embedded_dimension = app_config["embedded_dimension"]
|
| 140 |
+
use_py_session = app_config["use_py_session"]
|
| 141 |
+
trtLlm_debug_mode = app_config["trtLlm_debug_mode"]
|
| 142 |
+
add_special_tokens = app_config["add_special_tokens"]
|
| 143 |
+
verbose = app_config["verbose"]
|
| 144 |
+
|
| 145 |
+
# read model specific config
|
| 146 |
+
selected_model_name = None
|
| 147 |
+
selected_data_directory = None
|
| 148 |
+
config = read_config(model_config_file)
|
| 149 |
+
if os.path.exists(preference_config_file):
|
| 150 |
+
perf_config = read_config(preference_config_file)
|
| 151 |
+
selected_model_name = perf_config.get('models', {}).get('selected')
|
| 152 |
+
selected_data_directory = perf_config.get('dataset', {}).get('path')
|
| 153 |
+
|
| 154 |
+
if selected_model_name == None:
|
| 155 |
+
selected_model_name = config["models"].get("selected")
|
| 156 |
+
|
| 157 |
+
if selected_model_name == "CLIP":
|
| 158 |
+
selected_CLIP = True
|
| 159 |
+
if selected_model_name == "ChatGLM 3 6B int4 (Supports Chinese)":
|
| 160 |
+
selected_ChatGLM = True
|
| 161 |
+
|
| 162 |
+
model_config = get_model_config(config, selected_model_name)
|
| 163 |
+
data_dir = config["dataset"]["path"] if selected_data_directory == None else selected_data_directory
|
| 164 |
+
|
| 165 |
+
asr_model_name = "Whisper Medium Int8"
|
| 166 |
+
asr_model_config = get_asr_model_config(config, asr_model_name)
|
| 167 |
+
asr_engine_path = asr_model_config["model_path"]
|
| 168 |
+
asr_assets_path = asr_model_config["assets_path"]
|
| 169 |
+
|
| 170 |
+
whisper_model = None
|
| 171 |
+
whisper_model_loaded = False
|
| 172 |
+
enable_asr = config["models"]["enable_asr"]
|
| 173 |
+
nvmlInit()
|
| 174 |
+
|
| 175 |
+
def generate_inferance_engine(data, force_rewrite=False):
|
| 176 |
+
"""
|
| 177 |
+
Initialize and return a FAISS-based inference engine.
|
| 178 |
+
|
| 179 |
+
Args:
|
| 180 |
+
data: The directory where the data for the inference engine is located.
|
| 181 |
+
force_rewrite (bool): If True, force rewriting the index.
|
| 182 |
+
|
| 183 |
+
Returns:
|
| 184 |
+
The initialized inference engine.
|
| 185 |
+
|
| 186 |
+
Raises:
|
| 187 |
+
RuntimeError: If unable to generate the inference engine.
|
| 188 |
+
"""
|
| 189 |
+
try:
|
| 190 |
+
global engine
|
| 191 |
+
faiss_storage = FaissEmbeddingStorage(data_dir=data,
|
| 192 |
+
dimension=embedded_dimension)
|
| 193 |
+
faiss_storage.initialize_index(force_rewrite=force_rewrite)
|
| 194 |
+
engine = faiss_storage.get_engine(is_chat_engine=is_chat_engine, streaming=streaming,
|
| 195 |
+
similarity_top_k=similarity_top_k)
|
| 196 |
+
except Exception as e:
|
| 197 |
+
raise RuntimeError(f"Unable to generate the inference engine: {e}")
|
| 198 |
+
|
| 199 |
+
def generate_clip_engine(data_dir, model_path, clip_model, clip_processor, force_rewrite=False):
|
| 200 |
+
global clip_engine
|
| 201 |
+
clip_engine = CLIPEmbeddingStorageEngine(data_dir, model_path, clip_model, clip_processor)
|
| 202 |
+
clip_engine.create_nodes(force_rewrite)
|
| 203 |
+
clip_engine.initialize_index(force_rewrite)
|
| 204 |
+
|
| 205 |
+
llm = None
|
| 206 |
+
embed_model = None
|
| 207 |
+
service_context = None
|
| 208 |
+
clip_model = None
|
| 209 |
+
clip_processor = None
|
| 210 |
+
|
| 211 |
+
if selected_CLIP:
|
| 212 |
+
# Initialize model and processor
|
| 213 |
+
clip_model = CLIPModel.from_pretrained(model_config["model_path"]).to('cuda')
|
| 214 |
+
clip_processor = CLIPProcessor.from_pretrained(model_config["model_path"])
|
| 215 |
+
generate_clip_engine(data_dir, model_config["model_path"], clip_model, clip_processor)
|
| 216 |
else:
|
| 217 |
+
# create trt_llm engine object
|
| 218 |
+
model_name, _ = read_model_name(model_config["model_path"])
|
| 219 |
+
prompt_template_obj = LLMPromptTemplate()
|
| 220 |
+
text_qa_template_str = prompt_template_obj.model_context_template(model_name)
|
| 221 |
+
selected_completion_to_prompt = text_qa_template_str
|
| 222 |
+
llm = TrtLlmAPI(
|
| 223 |
+
model_path=model_config["model_path"],
|
| 224 |
+
engine_name=model_config["engine"],
|
| 225 |
+
tokenizer_dir=model_config["tokenizer_path"],
|
| 226 |
+
temperature=model_config["temperature"],
|
| 227 |
+
max_new_tokens=model_config["max_new_tokens"],
|
| 228 |
+
context_window=model_config["max_input_token"],
|
| 229 |
+
vocab_file=model_config["vocab"],
|
| 230 |
+
messages_to_prompt=messages_to_prompt,
|
| 231 |
+
completion_to_prompt=selected_completion_to_prompt,
|
| 232 |
+
use_py_session=use_py_session,
|
| 233 |
+
add_special_tokens=add_special_tokens,
|
| 234 |
+
trtLlm_debug_mode=trtLlm_debug_mode,
|
| 235 |
+
verbose=verbose
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
# create embeddings model object
|
| 239 |
+
embed_model = HuggingFaceEmbeddings(model_name=embedded_model)
|
| 240 |
+
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model,
|
| 241 |
+
context_window=model_config["max_input_token"], chunk_size=512,
|
| 242 |
+
chunk_overlap=200)
|
| 243 |
+
set_global_service_context(service_context)
|
| 244 |
+
|
| 245 |
+
# load the vectorstore index
|
| 246 |
+
generate_inferance_engine(data_dir)
|
| 247 |
+
|
| 248 |
+
def call_llm_streamed(query):
|
| 249 |
+
partial_response = ""
|
| 250 |
+
response = llm.stream_complete(query, formatted=False)
|
| 251 |
+
for token in response:
|
| 252 |
+
partial_response += token.delta
|
| 253 |
+
yield partial_response
|
| 254 |
+
|
| 255 |
+
def chatbot(query, chat_history, session_id):
|
| 256 |
+
if selected_CLIP:
|
| 257 |
+
ts = calendar.timegm(time.gmtime())
|
| 258 |
+
temp_image_folder_name = "Temp/Temp_Images"
|
| 259 |
+
if os.path.isdir(temp_image_folder_name):
|
| 260 |
+
try:
|
| 261 |
+
shutil.rmtree(os.path.join(os.getcwd(), temp_image_folder_name))
|
| 262 |
+
except Exception as e:
|
| 263 |
+
print("Exception during folder delete", e)
|
| 264 |
+
image_results_path = os.path.join(os.getcwd(), temp_image_folder_name, str(ts))
|
| 265 |
+
res_im_paths = clip_engine.query(query, image_results_path)
|
| 266 |
+
if len(res_im_paths) == 0:
|
| 267 |
+
yield "No supported images found in the selected folder"
|
| 268 |
+
torch.cuda.empty_cache()
|
| 269 |
+
gc.collect()
|
| 270 |
+
return
|
| 271 |
+
|
| 272 |
+
div_start = '<div class="chat-output-images">'
|
| 273 |
+
div_end = '</div>'
|
| 274 |
+
im_elements = ''
|
| 275 |
+
for i, im in enumerate(res_im_paths):
|
| 276 |
+
if i>2 : break # display atmost 3 images.
|
| 277 |
+
cur_data_link_src = temp_image_folder_name +"/" + str(ts) + "/" + os.path.basename(im)
|
| 278 |
+
cur_src = "file/" + temp_image_folder_name +"/" + str(ts) + "/" + os.path.basename(im)
|
| 279 |
+
im_elements += '<img data-link="{data_link_src}" src="{src}"/>'.format(src=cur_src, data_link_src=cur_data_link_src)
|
| 280 |
+
full_div = (div_start + im_elements + div_end)
|
| 281 |
+
folder_link = f'<a data-link="{image_results_path}">{"See all matches"}</a>'
|
| 282 |
+
prefix = ""
|
| 283 |
+
if(len(res_im_paths)>1):
|
| 284 |
+
prefix = "Here are the top matching pictures from your dataset"
|
| 285 |
+
else:
|
| 286 |
+
prefix = "Here is the top matching picture from your dataset"
|
| 287 |
+
response = prefix + "<br>"+ full_div + "<br>"+ folder_link
|
| 288 |
+
|
| 289 |
+
gc.collect()
|
| 290 |
+
torch.cuda.empty_cache()
|
| 291 |
+
yield response
|
| 292 |
+
torch.cuda.empty_cache()
|
| 293 |
+
gc.collect()
|
| 294 |
+
return
|
| 295 |
+
|
| 296 |
+
if data_source == "nodataset":
|
| 297 |
+
yield llm.complete(query, formatted=False).text
|
| 298 |
+
return
|
| 299 |
+
|
| 300 |
+
if is_chat_engine:
|
| 301 |
+
response = engine.chat(query)
|
| 302 |
+
else:
|
| 303 |
+
response = engine.query(query)
|
| 304 |
+
|
| 305 |
+
lowest_score_file = None
|
| 306 |
+
lowest_score = sys.float_info.max
|
| 307 |
+
for node in response.source_nodes:
|
| 308 |
+
metadata = node.metadata
|
| 309 |
+
if 'filename' in metadata:
|
| 310 |
+
if node.score < lowest_score:
|
| 311 |
+
lowest_score = node.score
|
| 312 |
+
lowest_score_file = metadata['filename']
|
| 313 |
+
|
| 314 |
+
file_links = []
|
| 315 |
+
seen_files = set() # Set to track unique file names
|
| 316 |
+
ts = calendar.timegm(time.gmtime())
|
| 317 |
+
temp_docs_folder_name = "Temp/Temp_Docs"
|
| 318 |
+
docs_path = os.path.join(os.getcwd(), temp_docs_folder_name, str(ts))
|
| 319 |
+
os.makedirs(docs_path, exist_ok=True)
|
| 320 |
+
|
| 321 |
+
# Generate links for the file with the highest aggregated score
|
| 322 |
+
if lowest_score_file:
|
| 323 |
+
abs_path = Path(os.path.join(os.getcwd(), lowest_score_file.replace('\\', '/')))
|
| 324 |
+
file_name = os.path.basename(abs_path)
|
| 325 |
+
doc_path = os.path.join(docs_path, file_name)
|
| 326 |
+
shutil.copy(abs_path, doc_path)
|
| 327 |
+
|
| 328 |
+
if file_name not in seen_files: # Ensure the file hasn't already been processed
|
| 329 |
+
if data_source == 'directory':
|
| 330 |
+
file_link = f'<a data-link="{doc_path}">{file_name}</a>'
|
| 331 |
+
else:
|
| 332 |
+
exit("Wrong data_source type")
|
| 333 |
+
file_links.append(file_link)
|
| 334 |
+
seen_files.add(file_name) # Mark file as processed
|
| 335 |
+
|
| 336 |
+
response_txt = str(response)
|
| 337 |
+
if file_links:
|
| 338 |
+
response_txt += "<br>Reference files:<br>" + "<br>".join(file_links)
|
| 339 |
+
if not lowest_score_file: # If no file with a high score was found
|
| 340 |
+
response_txt = llm.complete(query).text
|
| 341 |
+
yield response_txt
|
| 342 |
+
|
| 343 |
+
def stream_chatbot(query, chat_history, session_id):
|
| 344 |
+
|
| 345 |
+
if selected_CLIP:
|
| 346 |
+
ts = calendar.timegm(time.gmtime())
|
| 347 |
+
temp_image_folder_name = "Temp/Temp_Images"
|
| 348 |
+
if os.path.isdir(temp_image_folder_name):
|
| 349 |
+
try:
|
| 350 |
+
shutil.rmtree(os.path.join(os.getcwd(), temp_image_folder_name))
|
| 351 |
+
except Exception as e:
|
| 352 |
+
print("Exception during folder delete", e)
|
| 353 |
+
image_results_path = os.path.join(os.getcwd(), temp_image_folder_name, str(ts))
|
| 354 |
+
res_im_paths = clip_engine.query(query, image_results_path)
|
| 355 |
+
if len(res_im_paths) == 0:
|
| 356 |
+
yield "No supported images found in the selected folder"
|
| 357 |
+
torch.cuda.empty_cache()
|
| 358 |
+
gc.collect()
|
| 359 |
+
return
|
| 360 |
+
div_start = '<div class="chat-output-images">'
|
| 361 |
+
div_end = '</div>'
|
| 362 |
+
im_elements = ''
|
| 363 |
+
for i, im in enumerate(res_im_paths):
|
| 364 |
+
if i>2 : break # display atmost 3 images.
|
| 365 |
+
cur_data_link_src = temp_image_folder_name +"/" + str(ts) + "/" + os.path.basename(im)
|
| 366 |
+
cur_src = "file/" + temp_image_folder_name +"/" + str(ts) + "/" + os.path.basename(im)
|
| 367 |
+
im_elements += '<img data-link="{data_link_src}" src="{src}"/>'.format(src=cur_src, data_link_src=cur_data_link_src)
|
| 368 |
+
full_div = (div_start + im_elements + div_end)
|
| 369 |
+
folder_link = f'<a data-link="{image_results_path}">{"See all matches"}</a>'
|
| 370 |
+
prefix = ""
|
| 371 |
+
if(len(res_im_paths)>1):
|
| 372 |
+
prefix = "Here are the top matching pictures from your dataset"
|
| 373 |
+
else:
|
| 374 |
+
prefix = "Here is the top matching picture from your dataset"
|
| 375 |
+
response = prefix + "<br>"+ full_div + "<br>"+ folder_link
|
| 376 |
+
yield response
|
| 377 |
+
torch.cuda.empty_cache()
|
| 378 |
+
gc.collect()
|
| 379 |
+
return
|
| 380 |
+
|
| 381 |
+
if data_source == "nodataset":
|
| 382 |
+
for response in call_llm_streamed(query):
|
| 383 |
+
yield response
|
| 384 |
+
return
|
| 385 |
+
|
| 386 |
+
if is_chat_engine:
|
| 387 |
+
response = engine.stream_chat(query)
|
| 388 |
+
else:
|
| 389 |
+
response = engine.query(query)
|
| 390 |
+
|
| 391 |
+
partial_response = ""
|
| 392 |
+
if len(response.source_nodes) == 0:
|
| 393 |
+
response = llm.stream_complete(query, formatted=False)
|
| 394 |
+
for token in response:
|
| 395 |
+
partial_response += token.delta
|
| 396 |
+
yield partial_response
|
| 397 |
+
else:
|
| 398 |
+
# Aggregate scores by file
|
| 399 |
+
lowest_score_file = None
|
| 400 |
+
lowest_score = sys.float_info.max
|
| 401 |
+
|
| 402 |
+
for node in response.source_nodes:
|
| 403 |
+
if 'filename' in node.metadata:
|
| 404 |
+
if node.score < lowest_score:
|
| 405 |
+
lowest_score = node.score
|
| 406 |
+
lowest_score_file = node.metadata['filename']
|
| 407 |
+
|
| 408 |
+
file_links = []
|
| 409 |
+
seen_files = set()
|
| 410 |
+
for token in response.response_gen:
|
| 411 |
+
partial_response += token
|
| 412 |
+
yield partial_response
|
| 413 |
+
time.sleep(0.05)
|
| 414 |
+
|
| 415 |
+
time.sleep(0.2)
|
| 416 |
+
ts = calendar.timegm(time.gmtime())
|
| 417 |
+
temp_docs_folder_name = "Temp/Temp_Docs"
|
| 418 |
+
docs_path = os.path.join(os.getcwd(), temp_docs_folder_name, str(ts))
|
| 419 |
+
os.makedirs(docs_path, exist_ok=True)
|
| 420 |
+
|
| 421 |
+
if lowest_score_file:
|
| 422 |
+
abs_path = Path(os.path.join(os.getcwd(), lowest_score_file.replace('\\', '/')))
|
| 423 |
+
file_name = os.path.basename(abs_path)
|
| 424 |
+
doc_path = os.path.join(docs_path, file_name)
|
| 425 |
+
shutil.copy(abs_path, doc_path)
|
| 426 |
+
if file_name not in seen_files: # Check if file_name is already seen
|
| 427 |
+
if data_source == 'directory':
|
| 428 |
+
file_link = f'<a data-link="{doc_path}">{file_name}</a>'
|
| 429 |
+
else:
|
| 430 |
+
exit("Wrong data_source type")
|
| 431 |
+
file_links.append(file_link)
|
| 432 |
+
seen_files.add(file_name) # Add file_name to the set
|
| 433 |
+
|
| 434 |
+
if file_links:
|
| 435 |
+
partial_response += "<br>Reference files:<br>" + "<br>".join(file_links)
|
| 436 |
+
yield partial_response
|
| 437 |
+
|
| 438 |
+
# call garbage collector after inference
|
| 439 |
+
torch.cuda.empty_cache()
|
| 440 |
+
gc.collect()
|
| 441 |
+
|
| 442 |
+
|
| 443 |
+
interface = MainInterface(chatbot=stream_chatbot if streaming else chatbot, streaming=streaming)
|
| 444 |
+
|
| 445 |
+
|
| 446 |
+
def on_shutdown_handler(session_id):
|
| 447 |
+
global llm, whisper_model, clip_model, clip_processor, clip_engine
|
| 448 |
+
import gc
|
| 449 |
+
if whisper_model is not None:
|
| 450 |
+
whisper_model.unload_model()
|
| 451 |
+
del whisper_model
|
| 452 |
+
whisper_model = None
|
| 453 |
+
if llm is not None:
|
| 454 |
+
llm.unload_model()
|
| 455 |
+
del llm
|
| 456 |
+
llm = None
|
| 457 |
+
if clip_model is not None:
|
| 458 |
+
del clip_model
|
| 459 |
+
del clip_processor
|
| 460 |
+
del clip_engine
|
| 461 |
+
clip_model = None
|
| 462 |
+
clip_processor = None
|
| 463 |
+
clip_engine = None
|
| 464 |
+
temp_data_folder_name = "Temp"
|
| 465 |
+
if os.path.isdir(temp_data_folder_name):
|
| 466 |
+
try:
|
| 467 |
+
shutil.rmtree(os.path.join(os.getcwd(), temp_data_folder_name))
|
| 468 |
+
except Exception as e:
|
| 469 |
+
print("Exception during temp folder delete", e)
|
| 470 |
+
# Force a garbage collection cycle
|
| 471 |
+
gc.collect()
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
interface.on_shutdown(on_shutdown_handler)
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
def reset_chat_handler(session_id):
|
| 478 |
+
global faiss_storage
|
| 479 |
+
global engine
|
| 480 |
+
print('reset chat called', session_id)
|
| 481 |
+
if selected_CLIP:
|
| 482 |
+
return
|
| 483 |
+
if is_chat_engine == True:
|
| 484 |
+
faiss_storage.reset_engine(engine)
|
| 485 |
+
|
| 486 |
+
|
| 487 |
+
interface.on_reset_chat(reset_chat_handler)
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
def on_dataset_path_updated_handler(source, new_directory, video_count, session_id):
|
| 491 |
+
print('data set path updated to ', source, new_directory, video_count, session_id)
|
| 492 |
+
global engine
|
| 493 |
+
global data_dir
|
| 494 |
+
if selected_CLIP:
|
| 495 |
+
data_dir = new_directory
|
| 496 |
+
generate_clip_engine(data_dir, model_config["model_path"], clip_model, clip_processor)
|
| 497 |
+
return
|
| 498 |
+
if source == 'directory':
|
| 499 |
+
if data_dir != new_directory:
|
| 500 |
+
data_dir = new_directory
|
| 501 |
+
generate_inferance_engine(data_dir)
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
interface.on_dataset_path_updated(on_dataset_path_updated_handler)
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
def on_model_change_handler(model, model_info, session_id):
|
| 508 |
+
global llm, embedded_model, engine, data_dir, service_context, clip_model, clip_processor, selected_CLIP, selected_model_name, embed_model, model_config, selected_ChatGLM, clip_engine
|
| 509 |
+
selected_model_name = model
|
| 510 |
+
selected_ChatGLM = False
|
| 511 |
+
|
| 512 |
+
if llm is not None:
|
| 513 |
+
llm.unload_model()
|
| 514 |
+
del llm
|
| 515 |
+
llm = None
|
| 516 |
+
|
| 517 |
+
if clip_model != None:
|
| 518 |
+
del clip_model
|
| 519 |
+
clip_model = None
|
| 520 |
+
del clip_processor
|
| 521 |
+
clip_processor = None
|
| 522 |
+
del clip_engine
|
| 523 |
+
clip_engine = None
|
| 524 |
+
|
| 525 |
+
torch.cuda.empty_cache()
|
| 526 |
+
gc.collect()
|
| 527 |
+
|
| 528 |
+
cwd = os.getcwd()
|
| 529 |
+
model_config = get_model_config(config, selected_model_name)
|
| 530 |
+
|
| 531 |
+
selected_CLIP = False
|
| 532 |
+
if selected_model_name == "CLIP":
|
| 533 |
+
selected_CLIP = True
|
| 534 |
+
if clip_model == None:
|
| 535 |
+
clip_model = CLIPModel.from_pretrained(model_config["model_path"]).to('cuda')
|
| 536 |
+
clip_processor = CLIPProcessor.from_pretrained(model_config["model_path"])
|
| 537 |
+
generate_clip_engine(data_dir, model_config["model_path"], clip_model, clip_processor)
|
| 538 |
+
return
|
| 539 |
+
|
| 540 |
+
model_path = os.path.join(cwd, "model", model_info["id"], "engine") if "id" in model_info else None
|
| 541 |
+
engine_name = model_info["metadata"].get('engine', None)
|
| 542 |
+
|
| 543 |
+
if not model_path or not engine_name:
|
| 544 |
+
print("Model path or engine not provided in metadata")
|
| 545 |
+
return
|
| 546 |
+
|
| 547 |
+
if selected_model_name == "ChatGLM 3 6B int4 (Supports Chinese)":
|
| 548 |
+
selected_ChatGLM = True
|
| 549 |
+
|
| 550 |
+
model_name, _ = read_model_name(model_path)
|
| 551 |
+
prompt_template = LLMPromptTemplate()
|
| 552 |
+
text_qa_template_str = prompt_template.model_context_template(model_name)
|
| 553 |
+
selected_completion_to_prompt = text_qa_template_str
|
| 554 |
+
|
| 555 |
+
#selected_completion_to_prompt = chatglm_completion_to_prompt if selected_ChatGLM else completion_to_prompt
|
| 556 |
+
llm = TrtLlmAPI(
|
| 557 |
+
model_path=model_path,
|
| 558 |
+
engine_name=engine_name,
|
| 559 |
+
tokenizer_dir=os.path.join(cwd, "model", model_info["id"] ,model_info["prerequisite"]["tokenizer_local_dir"] ) if "tokenizer_local_dir" in model_info["prerequisite"] else None,
|
| 560 |
+
temperature=model_info["metadata"].get("temperature"),
|
| 561 |
+
max_new_tokens=model_info["metadata"].get("max_new_tokens"),
|
| 562 |
+
context_window=model_info["metadata"].get("max_input_token"),
|
| 563 |
+
vocab_file=os.path.join(cwd, "model", model_info["id"] ,model_info["prerequisite"]["vocab_local_dir"], model_info["prerequisite"]["tokenizer_files"]["vocab_file"]) if "vocab_local_dir" in model_info["prerequisite"] else None,
|
| 564 |
+
messages_to_prompt=messages_to_prompt,
|
| 565 |
+
completion_to_prompt=selected_completion_to_prompt,
|
| 566 |
+
use_py_session=use_py_session,
|
| 567 |
+
add_special_tokens=add_special_tokens,
|
| 568 |
+
trtLlm_debug_mode=trtLlm_debug_mode,
|
| 569 |
+
verbose=verbose
|
| 570 |
+
)
|
| 571 |
+
if embed_model is None : embed_model = HuggingFaceEmbeddings(model_name=embedded_model)
|
| 572 |
+
if service_context is None:
|
| 573 |
+
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model,
|
| 574 |
+
context_window=model_config["max_input_token"], chunk_size=512,
|
| 575 |
+
chunk_overlap=200)
|
| 576 |
+
else:
|
| 577 |
+
service_context = ServiceContext.from_service_context(service_context=service_context, llm=llm)
|
| 578 |
+
set_global_service_context(service_context)
|
| 579 |
+
generate_inferance_engine(data_dir)
|
| 580 |
+
|
| 581 |
+
|
| 582 |
+
interface.on_model_change(on_model_change_handler)
|
| 583 |
+
|
| 584 |
+
|
| 585 |
+
def on_dataset_source_change_handler(source, path, session_id):
|
| 586 |
+
|
| 587 |
+
global data_source, data_dir, engine
|
| 588 |
+
data_source = source
|
| 589 |
+
|
| 590 |
+
if data_source == "nodataset":
|
| 591 |
+
print(' No dataset source selected', session_id)
|
| 592 |
+
return
|
| 593 |
+
|
| 594 |
+
print('dataset source updated ', source, path, session_id)
|
| 595 |
+
|
| 596 |
+
if data_source == "directory":
|
| 597 |
+
data_dir = path
|
| 598 |
+
else:
|
| 599 |
+
print("Wrong data type selected")
|
| 600 |
+
generate_inferance_engine(data_dir)
|
| 601 |
+
|
| 602 |
+
|
| 603 |
+
interface.on_dataset_source_updated(on_dataset_source_change_handler)
|
| 604 |
+
|
| 605 |
+
def handle_regenerate_index(source, path, session_id):
|
| 606 |
+
if selected_CLIP:
|
| 607 |
+
generate_clip_engine(data_dir, model_config["model_path"], clip_model, clip_processor, force_rewrite=True)
|
| 608 |
+
else:
|
| 609 |
+
generate_inferance_engine(path, force_rewrite=True)
|
| 610 |
+
print("on regenerate index", source, path, session_id)
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
def mic_init_handler():
|
| 614 |
+
global whisper_model, whisper_model_loaded, enable_asr
|
| 615 |
+
enable_asr = config["models"]["enable_asr"]
|
| 616 |
+
if not enable_asr:
|
| 617 |
+
return False
|
| 618 |
+
vid_mem_info = nvmlDeviceGetMemoryInfo(nvmlDeviceGetHandleByIndex(0))
|
| 619 |
+
free_vid_mem = vid_mem_info.free / (1024*1024)
|
| 620 |
+
print("free video memory in MB = ", free_vid_mem)
|
| 621 |
+
if whisper_model is not None:
|
| 622 |
+
whisper_model.unload_model()
|
| 623 |
+
del whisper_model
|
| 624 |
+
whisper_model = None
|
| 625 |
+
whisper_model = WhisperTRTLLM(asr_engine_path, assets_dir=asr_assets_path)
|
| 626 |
+
whisper_model_loaded = True
|
| 627 |
+
return True
|
| 628 |
+
|
| 629 |
+
interface.on_mic_button_click(mic_init_handler)
|
| 630 |
+
|
| 631 |
+
def mic_recording_done_handler(audio_path):
|
| 632 |
+
transcription = ""
|
| 633 |
+
global whisper_model, enable_asr, whisper_model_loaded
|
| 634 |
+
if not enable_asr:
|
| 635 |
+
return ""
|
| 636 |
+
|
| 637 |
+
# Check and wait until model is loaded before running it.
|
| 638 |
+
checks_left_for_model_loading = 40
|
| 639 |
+
sleep_time = 0.2
|
| 640 |
+
while checks_left_for_model_loading>0 and not whisper_model_loaded:
|
| 641 |
+
time.sleep(sleep_time)
|
| 642 |
+
checks_left_for_model_loading -= 1
|
| 643 |
+
assert checks_left_for_model_loading>0, f"Whisper model loading not finished even after {(checks_left_for_model_loading*sleep_time)} seconds"
|
| 644 |
+
if checks_left_for_model_loading == 0:
|
| 645 |
+
return ""
|
| 646 |
+
|
| 647 |
+
# Covert the audio file into required sampling rate
|
| 648 |
+
current_sampling_rate, data = wavfile.read(audio_path)
|
| 649 |
+
new_sampling_rate = 16000
|
| 650 |
+
number_of_samples = round(len(data) * float(new_sampling_rate) / current_sampling_rate)
|
| 651 |
+
data = sps.resample(data, number_of_samples)
|
| 652 |
+
new_file_path = os.path.join( os.path.dirname(audio_path), "whisper_audio_input.wav" )
|
| 653 |
+
wavfile.write(new_file_path, new_sampling_rate, data.astype(np.int16))
|
| 654 |
+
language = "english"
|
| 655 |
+
if selected_ChatGLM: language = "chinese"
|
| 656 |
+
transcription = decode_audio_file( new_file_path, whisper_model, language=language, mel_filters_dir=asr_assets_path)
|
| 657 |
+
|
| 658 |
+
if whisper_model is not None:
|
| 659 |
+
whisper_model.unload_model()
|
| 660 |
+
del whisper_model
|
| 661 |
+
whisper_model = None
|
| 662 |
+
whisper_model_loaded = False
|
| 663 |
+
return transcription
|
| 664 |
+
|
| 665 |
+
interface.on_mic_recording_done(mic_recording_done_handler)
|
| 666 |
+
|
| 667 |
+
def model_download_handler(model_info):
|
| 668 |
+
download_path = os.path.join(os.getcwd(), "model")
|
| 669 |
+
status = download_model_by_name(model_info=model_info, download_path=download_path)
|
| 670 |
+
print(f"Model download status: {status}")
|
| 671 |
+
return status
|
| 672 |
+
|
| 673 |
+
interface.on_model_downloaded(model_download_handler)
|
| 674 |
+
|
| 675 |
+
def model_install_handler(model_info):
|
| 676 |
+
download_path = os.path.join(os.getcwd(), "model")
|
| 677 |
+
global llm, service_context
|
| 678 |
+
#unload the current model
|
| 679 |
+
if llm is not None:
|
| 680 |
+
llm.unload_model()
|
| 681 |
+
del llm
|
| 682 |
+
llm = None
|
| 683 |
+
# build the engine
|
| 684 |
+
status = build_engine_by_name(model_info=model_info , download_path= download_path)
|
| 685 |
+
print(f"Engine build status: {status}")
|
| 686 |
+
return status
|
| 687 |
+
|
| 688 |
+
interface.on_model_installed(model_install_handler)
|
| 689 |
+
|
| 690 |
+
def model_delete_handler(model_info):
|
| 691 |
+
print("Model deleting ", model_info)
|
| 692 |
+
model_dir = os.path.join(os.getcwd(), "model", model_info['id'])
|
| 693 |
+
isSuccess = True
|
| 694 |
+
if os.path.isdir(model_dir):
|
| 695 |
+
try:
|
| 696 |
+
shutil.rmtree(model_dir)
|
| 697 |
+
except Exception as e:
|
| 698 |
+
print("Exception during temp folder delete", e)
|
| 699 |
+
isSuccess = False
|
| 700 |
+
return isSuccess
|
| 701 |
+
|
| 702 |
+
interface.on_model_delete(model_delete_handler)
|
| 703 |
+
|
| 704 |
+
interface.on_regenerate_index(handle_regenerate_index)
|
| 705 |
+
# render the interface
|
| 706 |
+
interface.render()
|
deploy-docs.yml
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Deploy Docs to GitHub Pages
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
pull_request:
|
| 8 |
+
branches:
|
| 9 |
+
- main
|
| 10 |
+
|
| 11 |
+
jobs:
|
| 12 |
+
build:
|
| 13 |
+
name: Build Docusaurus
|
| 14 |
+
runs-on: ubuntu-latest
|
| 15 |
+
if: github.repository == 'OpenDevin/OpenDevin'
|
| 16 |
+
steps:
|
| 17 |
+
- uses: actions/checkout@v4
|
| 18 |
+
with:
|
| 19 |
+
fetch-depth: 0
|
| 20 |
+
- uses: actions/setup-node@v4
|
| 21 |
+
with:
|
| 22 |
+
node-version: 18
|
| 23 |
+
cache: npm
|
| 24 |
+
cache-dependency-path: docs/package-lock.json
|
| 25 |
+
- name: Set up Python
|
| 26 |
+
uses: actions/setup-python@v5
|
| 27 |
+
with:
|
| 28 |
+
python-version: "3.11"
|
| 29 |
+
|
| 30 |
+
- name: Generate Python Docs
|
| 31 |
+
run: rm -rf docs/modules/python && pip install pydoc-markdown && pydoc-markdown
|
| 32 |
+
- name: Install dependencies
|
| 33 |
+
run: cd docs && npm ci
|
| 34 |
+
- name: Build website
|
| 35 |
+
run: cd docs && npm run build
|
| 36 |
+
|
| 37 |
+
- name: Upload Build Artifact
|
| 38 |
+
if: github.ref == 'refs/heads/main'
|
| 39 |
+
uses: actions/upload-pages-artifact@v3
|
| 40 |
+
with:
|
| 41 |
+
path: docs/build
|
| 42 |
+
|
| 43 |
+
deploy:
|
| 44 |
+
name: Deploy to GitHub Pages
|
| 45 |
+
needs: build
|
| 46 |
+
if: github.ref == 'refs/heads/main' && github.repository == 'OpenDevin/OpenDevin'
|
| 47 |
+
# Grant GITHUB_TOKEN the permissions required to make a Pages deployment
|
| 48 |
+
permissions:
|
| 49 |
+
pages: write # to deploy to Pages
|
| 50 |
+
id-token: write # to verify the deployment originates from an appropriate source
|
| 51 |
+
# Deploy to the github-pages environment
|
| 52 |
+
environment:
|
| 53 |
+
name: github-pages
|
| 54 |
+
url: ${{ steps.deployment.outputs.page_url }}
|
| 55 |
+
runs-on: ubuntu-latest
|
| 56 |
+
steps:
|
| 57 |
+
- name: Deploy to GitHub Pages
|
| 58 |
+
id: deployment
|
| 59 |
+
uses: actions/deploy-pages@v4
|
dummy-agent-test.yml
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Run e2e test with dummy agent
|
| 2 |
+
|
| 3 |
+
concurrency:
|
| 4 |
+
group: ${{ github.workflow }}-${{ github.ref }}
|
| 5 |
+
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
| 6 |
+
|
| 7 |
+
on:
|
| 8 |
+
push:
|
| 9 |
+
branches:
|
| 10 |
+
- main
|
| 11 |
+
pull_request:
|
| 12 |
+
|
| 13 |
+
env:
|
| 14 |
+
PERSIST_SANDBOX : "false"
|
| 15 |
+
|
| 16 |
+
jobs:
|
| 17 |
+
test:
|
| 18 |
+
runs-on: ubuntu-latest
|
| 19 |
+
steps:
|
| 20 |
+
- uses: actions/checkout@v4
|
| 21 |
+
- name: Set up Python
|
| 22 |
+
uses: actions/setup-python@v5
|
| 23 |
+
with:
|
| 24 |
+
python-version: '3.11'
|
| 25 |
+
- name: Set up environment
|
| 26 |
+
run: |
|
| 27 |
+
curl -sSL https://install.python-poetry.org | python3 -
|
| 28 |
+
poetry install --without evaluation
|
| 29 |
+
poetry run playwright install --with-deps chromium
|
| 30 |
+
wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
|
| 31 |
+
- name: Run tests
|
| 32 |
+
run: |
|
| 33 |
+
poetry run python opendevin/core/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent
|
ghcr.yml
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Publish Docker Image
|
| 2 |
+
|
| 3 |
+
concurrency:
|
| 4 |
+
group: ${{ github.workflow }}-${{ github.ref }}
|
| 5 |
+
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
| 6 |
+
|
| 7 |
+
on:
|
| 8 |
+
push:
|
| 9 |
+
branches:
|
| 10 |
+
- main
|
| 11 |
+
tags:
|
| 12 |
+
- '*'
|
| 13 |
+
pull_request:
|
| 14 |
+
workflow_dispatch:
|
| 15 |
+
inputs:
|
| 16 |
+
reason:
|
| 17 |
+
description: 'Reason for manual trigger'
|
| 18 |
+
required: true
|
| 19 |
+
default: ''
|
| 20 |
+
|
| 21 |
+
jobs:
|
| 22 |
+
ghcr_build_and_push:
|
| 23 |
+
runs-on: ubuntu-latest
|
| 24 |
+
|
| 25 |
+
permissions:
|
| 26 |
+
contents: read
|
| 27 |
+
packages: write
|
| 28 |
+
|
| 29 |
+
strategy:
|
| 30 |
+
matrix:
|
| 31 |
+
image: ["app", "sandbox"]
|
| 32 |
+
|
| 33 |
+
steps:
|
| 34 |
+
- name: checkout
|
| 35 |
+
uses: actions/checkout@v4
|
| 36 |
+
|
| 37 |
+
- name: Free Disk Space (Ubuntu)
|
| 38 |
+
uses: jlumbroso/free-disk-space@main
|
| 39 |
+
with:
|
| 40 |
+
# this might remove tools that are actually needed,
|
| 41 |
+
# if set to "true" but frees about 6 GB
|
| 42 |
+
tool-cache: true
|
| 43 |
+
|
| 44 |
+
# all of these default to true, but feel free to set to
|
| 45 |
+
# "false" if necessary for your workflow
|
| 46 |
+
android: true
|
| 47 |
+
dotnet: true
|
| 48 |
+
haskell: true
|
| 49 |
+
large-packages: true
|
| 50 |
+
docker-images: false
|
| 51 |
+
swap-storage: true
|
| 52 |
+
|
| 53 |
+
- name: Set up QEMU
|
| 54 |
+
uses: docker/setup-qemu-action@v3
|
| 55 |
+
|
| 56 |
+
- name: Set up Docker Buildx
|
| 57 |
+
id: buildx
|
| 58 |
+
uses: docker/setup-buildx-action@v3
|
| 59 |
+
|
| 60 |
+
- name: Login to ghcr
|
| 61 |
+
uses: docker/login-action@v1
|
| 62 |
+
with:
|
| 63 |
+
registry: ghcr.io
|
| 64 |
+
username: ${{ github.repository_owner }}
|
| 65 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
| 66 |
+
|
| 67 |
+
- name: Build and push ${{ matrix.image }}
|
| 68 |
+
if: "!github.event.pull_request.head.repo.fork"
|
| 69 |
+
run: |
|
| 70 |
+
./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} --push
|
| 71 |
+
|
| 72 |
+
- name: Build ${{ matrix.image }}
|
| 73 |
+
if: "github.event.pull_request.head.repo.fork"
|
| 74 |
+
run: |
|
| 75 |
+
./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }}
|
| 76 |
+
|
| 77 |
+
docker_build_success:
|
| 78 |
+
name: Docker Build Success
|
| 79 |
+
runs-on: ubuntu-latest
|
| 80 |
+
needs: ghcr_build_and_push
|
| 81 |
+
steps:
|
| 82 |
+
- run: echo Done!
|
lint.yml
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Lint
|
| 2 |
+
|
| 3 |
+
concurrency:
|
| 4 |
+
group: ${{ github.workflow }}-${{ github.ref }}
|
| 5 |
+
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
| 6 |
+
|
| 7 |
+
on:
|
| 8 |
+
push:
|
| 9 |
+
branches:
|
| 10 |
+
- main
|
| 11 |
+
pull_request:
|
| 12 |
+
|
| 13 |
+
jobs:
|
| 14 |
+
lint-frontend:
|
| 15 |
+
name: Lint frontend
|
| 16 |
+
runs-on: ubuntu-latest
|
| 17 |
+
steps:
|
| 18 |
+
- uses: actions/checkout@v4
|
| 19 |
+
|
| 20 |
+
- name: Install Node.js 20
|
| 21 |
+
uses: actions/setup-node@v4
|
| 22 |
+
with:
|
| 23 |
+
node-version: 20
|
| 24 |
+
|
| 25 |
+
- name: Install dependencies
|
| 26 |
+
run: |
|
| 27 |
+
cd frontend
|
| 28 |
+
npm install --frozen-lockfile
|
| 29 |
+
|
| 30 |
+
- name: Lint
|
| 31 |
+
run: |
|
| 32 |
+
cd frontend
|
| 33 |
+
npm run lint
|
| 34 |
+
|
| 35 |
+
lint-python:
|
| 36 |
+
name: Lint python
|
| 37 |
+
runs-on: ubuntu-latest
|
| 38 |
+
steps:
|
| 39 |
+
- uses: actions/checkout@v4
|
| 40 |
+
with:
|
| 41 |
+
fetch-depth: 0
|
| 42 |
+
- name: Set up python
|
| 43 |
+
uses: actions/setup-python@v5
|
| 44 |
+
with:
|
| 45 |
+
python-version: 3.11
|
| 46 |
+
cache: 'pip'
|
| 47 |
+
- name: Install pre-commit
|
| 48 |
+
run: pip install pre-commit==3.7.0
|
| 49 |
+
- name: Run pre-commit hooks
|
| 50 |
+
if: github.ref != 'refs/heads/main'
|
| 51 |
+
run: |
|
| 52 |
+
git fetch https://github.com/OpenDevin/OpenDevin.git main:main && \
|
| 53 |
+
pre-commit run \
|
| 54 |
+
--files \
|
| 55 |
+
$(git diff --name-only $(git merge-base main $(git branch --show-current)) $(git branch --show-current) | tr '\n' ' ') \
|
| 56 |
+
--show-diff-on-failure \
|
| 57 |
+
--config ./dev_config/python/.pre-commit-config.yaml
|
review-pr.yml
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Use OpenDevin to Review Pull Request
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
pull_request:
|
| 5 |
+
types: [synchronize, labeled]
|
| 6 |
+
|
| 7 |
+
permissions:
|
| 8 |
+
contents: write
|
| 9 |
+
pull-requests: write
|
| 10 |
+
|
| 11 |
+
jobs:
|
| 12 |
+
dogfood:
|
| 13 |
+
if: contains(github.event.pull_request.labels.*.name, 'review-this')
|
| 14 |
+
runs-on: ubuntu-latest
|
| 15 |
+
container:
|
| 16 |
+
image: ghcr.io/opendevin/opendevin
|
| 17 |
+
volumes:
|
| 18 |
+
- /var/run/docker.sock:/var/run/docker.sock
|
| 19 |
+
|
| 20 |
+
steps:
|
| 21 |
+
- name: install git, github cli
|
| 22 |
+
run: |
|
| 23 |
+
apt-get install -y git gh
|
| 24 |
+
git config --global --add safe.directory $PWD
|
| 25 |
+
|
| 26 |
+
- name: Checkout Repository
|
| 27 |
+
uses: actions/checkout@v4
|
| 28 |
+
with:
|
| 29 |
+
ref: ${{ github.event.pull_request.base.ref }} # check out the target branch
|
| 30 |
+
|
| 31 |
+
- name: Download Diff
|
| 32 |
+
run: |
|
| 33 |
+
curl -O "${{ github.event.pull_request.diff_url }}" -L
|
| 34 |
+
|
| 35 |
+
- name: Write Task File
|
| 36 |
+
run: |
|
| 37 |
+
echo "Your coworker wants to apply a pull request to this project. Read and review ${{ github.event.pull_request.number }}.diff file. Create a review-${{ github.event.pull_request.number }}.txt and write your concise comments and suggestions there." > task.txt
|
| 38 |
+
echo "" >> task.txt
|
| 39 |
+
echo "Title" >> task.txt
|
| 40 |
+
echo "${{ github.event.pull_request.title }}" >> task.txt
|
| 41 |
+
echo "" >> task.txt
|
| 42 |
+
echo "Description" >> task.txt
|
| 43 |
+
echo "${{ github.event.pull_request.body }}" >> task.txt
|
| 44 |
+
echo "" >> task.txt
|
| 45 |
+
echo "Diff file is: ${{ github.event.pull_request.number }}.diff" >> task.txt
|
| 46 |
+
|
| 47 |
+
- name: Run OpenDevin
|
| 48 |
+
env:
|
| 49 |
+
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
| 50 |
+
SANDBOX_TYPE: exec
|
| 51 |
+
run: |
|
| 52 |
+
WORKSPACE_MOUNT_PATH=$GITHUB_WORKSPACE python ./opendevin/core/main.py -i 50 -f task.txt -d $GITHUB_WORKSPACE
|
| 53 |
+
rm task.txt
|
| 54 |
+
|
| 55 |
+
- name: Check if review file is non-empty
|
| 56 |
+
id: check_file
|
| 57 |
+
run: |
|
| 58 |
+
ls -la
|
| 59 |
+
if [[ -s review-${{ github.event.pull_request.number }}.txt ]]; then
|
| 60 |
+
echo "non_empty=true" >> $GITHUB_OUTPUT
|
| 61 |
+
fi
|
| 62 |
+
shell: bash
|
| 63 |
+
|
| 64 |
+
- name: Create PR review if file is non-empty
|
| 65 |
+
env:
|
| 66 |
+
GH_TOKEN: ${{ github.token }}
|
| 67 |
+
if: steps.check_file.outputs.non_empty == 'true'
|
| 68 |
+
run: |
|
| 69 |
+
gh pr review ${{ github.event.pull_request.number }} --comment --body-file "review-${{ github.event.pull_request.number }}.txt"
|
run-integration-tests.yml
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Run Integration Tests
|
| 2 |
+
|
| 3 |
+
concurrency:
|
| 4 |
+
group: ${{ github.workflow }}-${{ github.ref }}
|
| 5 |
+
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
| 6 |
+
|
| 7 |
+
on:
|
| 8 |
+
push:
|
| 9 |
+
branches:
|
| 10 |
+
- main
|
| 11 |
+
paths-ignore:
|
| 12 |
+
- '**/*.md'
|
| 13 |
+
- 'frontend/**'
|
| 14 |
+
- 'docs/**'
|
| 15 |
+
- 'evaluation/**'
|
| 16 |
+
pull_request:
|
| 17 |
+
|
| 18 |
+
env:
|
| 19 |
+
PERSIST_SANDBOX : "false"
|
| 20 |
+
|
| 21 |
+
jobs:
|
| 22 |
+
integration-tests-on-linux:
|
| 23 |
+
name: Integration Tests on Linux
|
| 24 |
+
runs-on: ubuntu-latest
|
| 25 |
+
strategy:
|
| 26 |
+
fail-fast: false
|
| 27 |
+
matrix:
|
| 28 |
+
python-version: ["3.11"]
|
| 29 |
+
sandbox: ["ssh", "exec", "local"]
|
| 30 |
+
steps:
|
| 31 |
+
- uses: actions/checkout@v4
|
| 32 |
+
|
| 33 |
+
- name: Install poetry via pipx
|
| 34 |
+
run: pipx install poetry
|
| 35 |
+
|
| 36 |
+
- name: Set up Python
|
| 37 |
+
uses: actions/setup-python@v5
|
| 38 |
+
with:
|
| 39 |
+
python-version: ${{ matrix.python-version }}
|
| 40 |
+
cache: 'poetry'
|
| 41 |
+
|
| 42 |
+
- name: Install Python dependencies using Poetry
|
| 43 |
+
run: poetry install
|
| 44 |
+
|
| 45 |
+
- name: Build Environment
|
| 46 |
+
run: make build
|
| 47 |
+
|
| 48 |
+
- name: Run Integration Tests
|
| 49 |
+
env:
|
| 50 |
+
SANDBOX_TYPE: ${{ matrix.sandbox }}
|
| 51 |
+
run: |
|
| 52 |
+
TEST_IN_CI=true TEST_ONLY=true ./tests/integration/regenerate.sh
|
| 53 |
+
|
| 54 |
+
- name: Upload coverage to Codecov
|
| 55 |
+
uses: codecov/codecov-action@v4
|
| 56 |
+
env:
|
| 57 |
+
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
| 58 |
+
|
| 59 |
+
integration-tests-on-mac:
|
| 60 |
+
name: Integration Tests on MacOS
|
| 61 |
+
runs-on: macos-13
|
| 62 |
+
if: contains(github.event.pull_request.title, 'mac') || contains(github.event.pull_request.title, 'Mac')
|
| 63 |
+
strategy:
|
| 64 |
+
fail-fast: false
|
| 65 |
+
matrix:
|
| 66 |
+
python-version: ["3.11"]
|
| 67 |
+
sandbox: ["ssh"]
|
| 68 |
+
steps:
|
| 69 |
+
- uses: actions/checkout@v4
|
| 70 |
+
|
| 71 |
+
- name: Install poetry via pipx
|
| 72 |
+
run: pipx install poetry
|
| 73 |
+
|
| 74 |
+
- name: Set up Python
|
| 75 |
+
uses: actions/setup-python@v5
|
| 76 |
+
with:
|
| 77 |
+
python-version: ${{ matrix.python-version }}
|
| 78 |
+
cache: 'poetry'
|
| 79 |
+
|
| 80 |
+
- name: Install Python dependencies using Poetry
|
| 81 |
+
run: poetry install
|
| 82 |
+
|
| 83 |
+
- name: Install & Start Docker
|
| 84 |
+
run: |
|
| 85 |
+
brew install colima docker
|
| 86 |
+
colima start
|
| 87 |
+
|
| 88 |
+
# For testcontainers to find the Colima socket
|
| 89 |
+
# https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
|
| 90 |
+
sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
|
| 91 |
+
|
| 92 |
+
- name: Build Environment
|
| 93 |
+
run: make build
|
| 94 |
+
|
| 95 |
+
- name: Run Integration Tests
|
| 96 |
+
env:
|
| 97 |
+
SANDBOX_TYPE: ${{ matrix.sandbox }}
|
| 98 |
+
run: |
|
| 99 |
+
TEST_IN_CI=true TEST_ONLY=true ./tests/integration/regenerate.sh
|
| 100 |
+
|
| 101 |
+
- name: Upload coverage to Codecov
|
| 102 |
+
uses: codecov/codecov-action@v4
|
| 103 |
+
env:
|
| 104 |
+
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
run-unit-tests.yml
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Run Unit Tests
|
| 2 |
+
|
| 3 |
+
concurrency:
|
| 4 |
+
group: ${{ github.workflow }}-${{ github.ref }}
|
| 5 |
+
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
| 6 |
+
|
| 7 |
+
on:
|
| 8 |
+
push:
|
| 9 |
+
branches:
|
| 10 |
+
- main
|
| 11 |
+
paths-ignore:
|
| 12 |
+
- '**/*.md'
|
| 13 |
+
- 'frontend/**'
|
| 14 |
+
- 'docs/**'
|
| 15 |
+
- 'evaluation/**'
|
| 16 |
+
pull_request:
|
| 17 |
+
|
| 18 |
+
env:
|
| 19 |
+
PERSIST_SANDBOX : "false"
|
| 20 |
+
|
| 21 |
+
jobs:
|
| 22 |
+
test-on-macos:
|
| 23 |
+
name: Test on macOS
|
| 24 |
+
runs-on: macos-13
|
| 25 |
+
env:
|
| 26 |
+
INSTALL_DOCKER: "0" # Set to '0' to skip Docker installation
|
| 27 |
+
strategy:
|
| 28 |
+
matrix:
|
| 29 |
+
python-version: ["3.11"]
|
| 30 |
+
|
| 31 |
+
steps:
|
| 32 |
+
- uses: actions/checkout@v4
|
| 33 |
+
|
| 34 |
+
- name: Install poetry via pipx
|
| 35 |
+
run: pipx install poetry
|
| 36 |
+
|
| 37 |
+
- name: Set up Python ${{ matrix.python-version }}
|
| 38 |
+
uses: actions/setup-python@v5
|
| 39 |
+
with:
|
| 40 |
+
python-version: ${{ matrix.python-version }}
|
| 41 |
+
cache: "poetry"
|
| 42 |
+
|
| 43 |
+
- name: Install Python dependencies using Poetry
|
| 44 |
+
run: poetry install
|
| 45 |
+
|
| 46 |
+
- name: Install & Start Docker
|
| 47 |
+
if: env.INSTALL_DOCKER == '1'
|
| 48 |
+
run: |
|
| 49 |
+
brew install colima docker
|
| 50 |
+
colima start
|
| 51 |
+
|
| 52 |
+
# For testcontainers to find the Colima socket
|
| 53 |
+
# https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
|
| 54 |
+
sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
|
| 55 |
+
|
| 56 |
+
- name: Build Environment
|
| 57 |
+
run: make build
|
| 58 |
+
|
| 59 |
+
- name: Run Tests
|
| 60 |
+
run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox"
|
| 61 |
+
|
| 62 |
+
- name: Upload coverage to Codecov
|
| 63 |
+
uses: codecov/codecov-action@v4
|
| 64 |
+
env:
|
| 65 |
+
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
| 66 |
+
test-on-linux:
|
| 67 |
+
name: Test on Linux
|
| 68 |
+
runs-on: ubuntu-latest
|
| 69 |
+
env:
|
| 70 |
+
INSTALL_DOCKER: "0" # Set to '0' to skip Docker installation
|
| 71 |
+
strategy:
|
| 72 |
+
matrix:
|
| 73 |
+
python-version: ["3.11"]
|
| 74 |
+
|
| 75 |
+
steps:
|
| 76 |
+
- uses: actions/checkout@v4
|
| 77 |
+
|
| 78 |
+
- name: Install poetry via pipx
|
| 79 |
+
run: pipx install poetry
|
| 80 |
+
|
| 81 |
+
- name: Set up Python
|
| 82 |
+
uses: actions/setup-python@v5
|
| 83 |
+
with:
|
| 84 |
+
python-version: ${{ matrix.python-version }}
|
| 85 |
+
cache: "poetry"
|
| 86 |
+
|
| 87 |
+
- name: Install Python dependencies using Poetry
|
| 88 |
+
run: poetry install --without evaluation
|
| 89 |
+
|
| 90 |
+
- name: Build Environment
|
| 91 |
+
run: make build
|
| 92 |
+
|
| 93 |
+
- name: Run Tests
|
| 94 |
+
run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox"
|
| 95 |
+
|
| 96 |
+
- name: Upload coverage to Codecov
|
| 97 |
+
uses: codecov/codecov-action@v4
|
| 98 |
+
env:
|
| 99 |
+
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
| 100 |
+
|
| 101 |
+
test-for-sandbox:
|
| 102 |
+
name: Test for Sandbox
|
| 103 |
+
runs-on: ubuntu-latest
|
| 104 |
+
steps:
|
| 105 |
+
- uses: actions/checkout@v4
|
| 106 |
+
|
| 107 |
+
- name: Install poetry via pipx
|
| 108 |
+
run: pipx install poetry
|
| 109 |
+
|
| 110 |
+
- name: Set up Python
|
| 111 |
+
uses: actions/setup-python@v5
|
| 112 |
+
with:
|
| 113 |
+
python-version: "3.11"
|
| 114 |
+
cache: "poetry"
|
| 115 |
+
|
| 116 |
+
- name: Install Python dependencies using Poetry
|
| 117 |
+
run: poetry install
|
| 118 |
+
|
| 119 |
+
- name: Build Environment
|
| 120 |
+
run: make build
|
| 121 |
+
|
| 122 |
+
- name: Run Integration Test for Sandbox
|
| 123 |
+
run: |
|
| 124 |
+
poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml -s ./tests/unit/test_sandbox.py
|
| 125 |
+
|
| 126 |
+
- name: Upload coverage to Codecov
|
| 127 |
+
uses: codecov/codecov-action@v4
|
| 128 |
+
env:
|
| 129 |
+
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
solve-issue.yml
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Use OpenDevin to Resolve GitHub Issue
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
issues:
|
| 5 |
+
types: [labeled]
|
| 6 |
+
|
| 7 |
+
permissions:
|
| 8 |
+
contents: write
|
| 9 |
+
pull-requests: write
|
| 10 |
+
issues: write
|
| 11 |
+
|
| 12 |
+
jobs:
|
| 13 |
+
dogfood:
|
| 14 |
+
if: github.event.label.name == 'solve-this'
|
| 15 |
+
runs-on: ubuntu-latest
|
| 16 |
+
container:
|
| 17 |
+
image: ghcr.io/opendevin/opendevin
|
| 18 |
+
volumes:
|
| 19 |
+
- /var/run/docker.sock:/var/run/docker.sock
|
| 20 |
+
|
| 21 |
+
steps:
|
| 22 |
+
- name: install git, github cli
|
| 23 |
+
run: apt-get install -y git gh
|
| 24 |
+
|
| 25 |
+
- name: Checkout Repository
|
| 26 |
+
uses: actions/checkout@v4
|
| 27 |
+
|
| 28 |
+
- name: Write Task File
|
| 29 |
+
env:
|
| 30 |
+
ISSUE_TITLE: ${{ github.event.issue.title }}
|
| 31 |
+
ISSUE_BODY: ${{ github.event.issue.body }}
|
| 32 |
+
run: |
|
| 33 |
+
echo "TITLE:" > task.txt
|
| 34 |
+
echo "${ISSUE_TITLE}" >> task.txt
|
| 35 |
+
echo "" >> task.txt
|
| 36 |
+
echo "BODY:" >> task.txt
|
| 37 |
+
echo "${ISSUE_BODY}" >> task.txt
|
| 38 |
+
|
| 39 |
+
- name: Run OpenDevin
|
| 40 |
+
env:
|
| 41 |
+
ISSUE_TITLE: ${{ github.event.issue.title }}
|
| 42 |
+
ISSUE_BODY: ${{ github.event.issue.body }}
|
| 43 |
+
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
| 44 |
+
SANDBOX_TYPE: exec
|
| 45 |
+
run: |
|
| 46 |
+
WORKSPACE_MOUNT_PATH=$GITHUB_WORKSPACE python ./opendevin/core/main.py -i 50 -f task.txt -d $GITHUB_WORKSPACE
|
| 47 |
+
rm task.txt
|
| 48 |
+
|
| 49 |
+
- name: Setup Git, Create Branch, and Commit Changes
|
| 50 |
+
run: |
|
| 51 |
+
# Setup Git configuration
|
| 52 |
+
git config --global --add safe.directory $PWD
|
| 53 |
+
git config --global user.name 'OpenDevin'
|
| 54 |
+
git config --global user.email 'OpenDevin@users.noreply.github.com'
|
| 55 |
+
|
| 56 |
+
# Create a unique branch name with a timestamp
|
| 57 |
+
BRANCH_NAME="fix/${{ github.event.issue.number }}-$(date +%Y%m%d%H%M%S)"
|
| 58 |
+
|
| 59 |
+
# Checkout new branch
|
| 60 |
+
git checkout -b $BRANCH_NAME
|
| 61 |
+
|
| 62 |
+
# Add all changes to staging, except task.txt
|
| 63 |
+
git add --all -- ':!task.txt'
|
| 64 |
+
|
| 65 |
+
# Commit the changes, if any
|
| 66 |
+
git commit -m "OpenDevin: Resolve Issue #${{ github.event.issue.number }}"
|
| 67 |
+
if [ $? -ne 0 ]; then
|
| 68 |
+
echo "No changes to commit."
|
| 69 |
+
exit 0
|
| 70 |
+
fi
|
| 71 |
+
|
| 72 |
+
# Push changes
|
| 73 |
+
git push --set-upstream origin $BRANCH_NAME
|
| 74 |
+
|
| 75 |
+
- name: Fetch Default Branch
|
| 76 |
+
env:
|
| 77 |
+
GH_TOKEN: ${{ github.token }}
|
| 78 |
+
run: |
|
| 79 |
+
# Fetch the default branch using gh cli
|
| 80 |
+
DEFAULT_BRANCH=$(gh repo view --json defaultBranchRef --jq .defaultBranchRef.name)
|
| 81 |
+
echo "Default branch is $DEFAULT_BRANCH"
|
| 82 |
+
echo "DEFAULT_BRANCH=$DEFAULT_BRANCH" >> $GITHUB_ENV
|
| 83 |
+
|
| 84 |
+
- name: Generate PR
|
| 85 |
+
env:
|
| 86 |
+
GH_TOKEN: ${{ github.token }}
|
| 87 |
+
run: |
|
| 88 |
+
# Create PR and capture URL
|
| 89 |
+
PR_URL=$(gh pr create \
|
| 90 |
+
--title "OpenDevin: Resolve Issue #2" \
|
| 91 |
+
--body "This PR was generated by OpenDevin to resolve issue #2" \
|
| 92 |
+
--repo "foragerr/OpenDevin" \
|
| 93 |
+
--head "${{ github.head_ref }}" \
|
| 94 |
+
--base "${{ env.DEFAULT_BRANCH }}" \
|
| 95 |
+
| grep -o 'https://github.com/[^ ]*')
|
| 96 |
+
|
| 97 |
+
# Extract PR number from URL
|
| 98 |
+
PR_NUMBER=$(echo "$PR_URL" | grep -o '[0-9]\+$')
|
| 99 |
+
|
| 100 |
+
# Set environment vars
|
| 101 |
+
echo "PR_URL=$PR_URL" >> $GITHUB_ENV
|
| 102 |
+
echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV
|
| 103 |
+
|
| 104 |
+
- name: Post Comment
|
| 105 |
+
env:
|
| 106 |
+
GH_TOKEN: ${{ github.token }}
|
| 107 |
+
run: |
|
| 108 |
+
gh issue comment ${{ github.event.issue.number }} \
|
| 109 |
+
-b "OpenDevin raised [PR #${{ env.PR_NUMBER }}](${{ env.PR_URL }}) to resolve this issue."
|
stale.yml
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: 'Close stale issues'
|
| 2 |
+
on:
|
| 3 |
+
schedule:
|
| 4 |
+
- cron: '30 1 * * *'
|
| 5 |
+
|
| 6 |
+
jobs:
|
| 7 |
+
stale:
|
| 8 |
+
runs-on: ubuntu-latest
|
| 9 |
+
steps:
|
| 10 |
+
- uses: actions/stale@v9
|
| 11 |
+
with:
|
| 12 |
+
# Aggressively close issues that have been explicitly labeled `age-out`
|
| 13 |
+
any-of-labels: age-out
|
| 14 |
+
stale-issue-message: 'This issue is stale because it has been open for 7 days with no activity. Remove stale label or comment or this will be closed in 1 day.'
|
| 15 |
+
close-issue-message: 'This issue was closed because it has been stalled for over 7 days with no activity.'
|
| 16 |
+
stale-pr-message: 'This PR is stale because it has been open for 7 days with no activity. Remove stale label or comment or this will be closed in 1 days.'
|
| 17 |
+
close-pr-message: 'This PR was closed because it has been stalled for over 7 days with no activity.'
|
| 18 |
+
days-before-stale: 7
|
| 19 |
+
days-before-close: 1
|
| 20 |
+
|
| 21 |
+
- uses: actions/stale@v9
|
| 22 |
+
with:
|
| 23 |
+
# Be more lenient with other issues
|
| 24 |
+
stale-issue-message: 'This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
|
| 25 |
+
close-issue-message: 'This issue was closed because it has been stalled for over 30 days with no activity.'
|
| 26 |
+
stale-pr-message: 'This PR is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
|
| 27 |
+
close-pr-message: 'This PR was closed because it has been stalled for over 30 days with no activity.'
|
| 28 |
+
days-before-stale: 30
|
| 29 |
+
days-before-close: 7
|