ai: Refactor the code for JARVI multi platform.
Browse files
jarvis.py
CHANGED
|
@@ -26,23 +26,27 @@ os.system("apt-get update -q -y && apt-get install -q -y tesseract-ocr tesseract
|
|
| 26 |
INTERNAL_AI_GET_SERVER = os.getenv("INTERNAL_AI_GET_SERVER")
|
| 27 |
INTERNAL_TRAINING_DATA = os.getenv("INTERNAL_TRAINING_DATA")
|
| 28 |
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
| 30 |
LINUX_SERVER_HOSTS_MARKED = set()
|
| 31 |
LINUX_SERVER_HOSTS_ATTEMPTS = {}
|
| 32 |
|
| 33 |
-
LINUX_SERVER_PROVIDER_KEYS = [
|
| 34 |
LINUX_SERVER_PROVIDER_KEYS_MARKED = set()
|
| 35 |
LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS = {}
|
| 36 |
|
| 37 |
-
LINUX_SERVER_ERRORS = set(map(int, os.getenv("LINUX_SERVER_ERROR").split(",")))
|
| 38 |
|
| 39 |
-
AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1,
|
| 40 |
RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 10)}
|
| 41 |
|
| 42 |
MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
|
| 43 |
MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
|
| 44 |
-
MODEL_CHOICES = list(MODEL_MAPPING.values())
|
| 45 |
DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
|
|
|
|
| 46 |
|
| 47 |
META_TAGS = os.getenv("META_TAGS")
|
| 48 |
|
|
@@ -50,157 +54,152 @@ ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS", "[]"))
|
|
| 50 |
|
| 51 |
ACTIVE_CANDIDATE = None
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
def get_available_items(items, marked):
|
| 54 |
-
|
| 55 |
-
random.shuffle(
|
| 56 |
-
return
|
| 57 |
|
| 58 |
def marked_item(item, marked, attempts):
|
| 59 |
marked.add(item)
|
| 60 |
attempts[item] = attempts.get(item, 0) + 1
|
| 61 |
if attempts[item] >= 3:
|
| 62 |
-
def
|
| 63 |
marked.discard(item)
|
| 64 |
attempts.pop(item, None)
|
| 65 |
-
threading.Timer(300,
|
| 66 |
-
|
| 67 |
-
class SessionWithID(requests.Session):
|
| 68 |
-
def __init__(self):
|
| 69 |
-
super().__init__()
|
| 70 |
-
self.session_id = str(uuid.uuid4())
|
| 71 |
-
|
| 72 |
-
def create_session():
|
| 73 |
-
return SessionWithID()
|
| 74 |
|
| 75 |
-
def get_model_key(
|
| 76 |
-
return next((k for k, v in MODEL_MAPPING.items() if v ==
|
| 77 |
|
| 78 |
-
def extract_file_content(
|
| 79 |
-
ext = Path(
|
| 80 |
-
|
| 81 |
try:
|
| 82 |
if ext == ".pdf":
|
| 83 |
-
with pdfplumber.open(
|
| 84 |
-
for
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
content += text + "\n"
|
| 88 |
-
for table in page.extract_tables():
|
| 89 |
-
table_str = "\n".join([", ".join(row) for row in table if row])
|
| 90 |
-
content += "\n" + table_str + "\n"
|
| 91 |
elif ext in [".doc", ".docx"]:
|
| 92 |
-
|
| 93 |
-
for para in
|
| 94 |
-
|
| 95 |
elif ext in [".xlsx", ".xls"]:
|
| 96 |
-
df = pd.read_excel(
|
| 97 |
-
|
| 98 |
elif ext in [".ppt", ".pptx"]:
|
| 99 |
-
prs = Presentation(
|
| 100 |
-
for
|
| 101 |
-
for
|
| 102 |
-
if hasattr(
|
| 103 |
-
|
| 104 |
-
elif ext in [".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".webp"]:
|
| 105 |
-
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
|
| 106 |
-
image = Image.open(file_path)
|
| 107 |
-
content += pytesseract.image_to_string(image) + "\n"
|
| 108 |
else:
|
| 109 |
-
|
| 110 |
except Exception as e:
|
| 111 |
-
|
| 112 |
-
return
|
| 113 |
|
| 114 |
-
async def fetch_response_async(host,
|
| 115 |
-
|
| 116 |
-
for timeout in timeouts:
|
| 117 |
try:
|
| 118 |
-
async with httpx.AsyncClient(timeout=
|
| 119 |
-
|
| 120 |
-
if
|
| 121 |
-
marked_item(
|
| 122 |
return None
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
if isinstance(
|
| 126 |
-
|
| 127 |
-
if
|
| 128 |
-
return
|
| 129 |
return None
|
| 130 |
-
except
|
| 131 |
continue
|
| 132 |
-
marked_item(
|
| 133 |
return None
|
| 134 |
|
| 135 |
-
async def chat_with_model_async(history, user_input,
|
| 136 |
-
if not get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED) or not get_available_items(LINUX_SERVER_HOSTS,
|
| 137 |
return RESPONSES["RESPONSE_3"]
|
| 138 |
if not hasattr(sess, "session_id"):
|
| 139 |
sess.session_id = str(uuid.uuid4())
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
if
|
| 144 |
-
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
| 146 |
global ACTIVE_CANDIDATE
|
| 147 |
if ACTIVE_CANDIDATE:
|
| 148 |
-
|
| 149 |
-
if
|
| 150 |
-
return
|
| 151 |
ACTIVE_CANDIDATE = None
|
| 152 |
keys = get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED)
|
| 153 |
-
hosts = get_available_items(LINUX_SERVER_HOSTS,
|
| 154 |
-
|
| 155 |
-
random.shuffle(
|
| 156 |
-
for
|
| 157 |
-
|
| 158 |
-
if
|
| 159 |
-
ACTIVE_CANDIDATE = (
|
| 160 |
-
return
|
| 161 |
return RESPONSES["RESPONSE_2"]
|
| 162 |
|
| 163 |
-
async def respond_async(
|
| 164 |
-
|
| 165 |
-
if not
|
| 166 |
yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
|
| 167 |
return
|
| 168 |
-
|
| 169 |
-
for
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
if
|
| 173 |
-
|
| 174 |
-
history.append([
|
| 175 |
-
|
| 176 |
history[-1][1] = ""
|
| 177 |
-
def
|
| 178 |
-
if isinstance(
|
| 179 |
-
|
| 180 |
-
if isinstance(
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
return json.dumps(data, ensure_ascii=False)
|
| 186 |
-
return repr(data)
|
| 187 |
-
for character in ai_response:
|
| 188 |
-
history[-1][1] += convert_to_string(character)
|
| 189 |
await asyncio.sleep(0.0001)
|
| 190 |
yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
|
| 191 |
|
| 192 |
-
def change_model(
|
| 193 |
-
|
|
|
|
|
|
|
| 194 |
|
| 195 |
with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
|
| 196 |
user_history = gr.State([])
|
| 197 |
user_session = gr.State(create_session())
|
| 198 |
selected_model = gr.State(MODEL_CHOICES[0] if MODEL_CHOICES else "")
|
|
|
|
| 199 |
chatbot = gr.Chatbot(label=AI_TYPES["AI_TYPE_1"], show_copy_button=True, scale=1, elem_id=AI_TYPES["AI_TYPE_2"])
|
| 200 |
with gr.Row():
|
| 201 |
msg = gr.MultimodalTextbox(show_label=False, placeholder=RESPONSES["RESPONSE_5"], interactive=True, file_count="single", file_types=ALLOWED_EXTENSIONS)
|
| 202 |
with gr.Accordion(AI_TYPES["AI_TYPE_6"], open=False):
|
| 203 |
model_dropdown = gr.Dropdown(show_label=False, choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
|
| 204 |
-
|
| 205 |
-
|
|
|
|
|
|
|
| 206 |
jarvis.launch(max_file_size="1mb")
|
|
|
|
| 26 |
INTERNAL_AI_GET_SERVER = os.getenv("INTERNAL_AI_GET_SERVER")
|
| 27 |
INTERNAL_TRAINING_DATA = os.getenv("INTERNAL_TRAINING_DATA")
|
| 28 |
|
| 29 |
+
SYSTEM_PROMPT_MAPPING = json.loads(os.getenv("SYSTEM_PROMPT_MAPPING", "{}"))
|
| 30 |
+
SYSTEM_PROMPT_DEFAULT = os.getenv("DEFAULT_SYSTEM")
|
| 31 |
+
|
| 32 |
+
LINUX_SERVER_HOSTS = [h for h in json.loads(os.getenv("LINUX_SERVER_HOST", "[]")) if h]
|
| 33 |
LINUX_SERVER_HOSTS_MARKED = set()
|
| 34 |
LINUX_SERVER_HOSTS_ATTEMPTS = {}
|
| 35 |
|
| 36 |
+
LINUX_SERVER_PROVIDER_KEYS = [k for k in json.loads(os.getenv("LINUX_SERVER_PROVIDER_KEY", "[]")) if k]
|
| 37 |
LINUX_SERVER_PROVIDER_KEYS_MARKED = set()
|
| 38 |
LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS = {}
|
| 39 |
|
| 40 |
+
LINUX_SERVER_ERRORS = set(map(int, os.getenv("LINUX_SERVER_ERROR", "").split(",")))
|
| 41 |
|
| 42 |
+
AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1, 8)}
|
| 43 |
RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 10)}
|
| 44 |
|
| 45 |
MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
|
| 46 |
MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
|
| 47 |
+
MODEL_CHOICES = list(MODEL_MAPPING.values())
|
| 48 |
DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
|
| 49 |
+
DEFAULT_MODEL_KEY = list(MODEL_MAPPING.keys())[0] if MODEL_MAPPING else None
|
| 50 |
|
| 51 |
META_TAGS = os.getenv("META_TAGS")
|
| 52 |
|
|
|
|
| 54 |
|
| 55 |
ACTIVE_CANDIDATE = None
|
| 56 |
|
| 57 |
+
class SessionWithID(requests.Session):
|
| 58 |
+
def __init__(self):
|
| 59 |
+
super().__init__()
|
| 60 |
+
self.session_id = str(uuid.uuid4())
|
| 61 |
+
|
| 62 |
+
def create_session():
|
| 63 |
+
return SessionWithID()
|
| 64 |
+
|
| 65 |
def get_available_items(items, marked):
|
| 66 |
+
a = [i for i in items if i not in marked]
|
| 67 |
+
random.shuffle(a)
|
| 68 |
+
return a
|
| 69 |
|
| 70 |
def marked_item(item, marked, attempts):
|
| 71 |
marked.add(item)
|
| 72 |
attempts[item] = attempts.get(item, 0) + 1
|
| 73 |
if attempts[item] >= 3:
|
| 74 |
+
def remove():
|
| 75 |
marked.discard(item)
|
| 76 |
attempts.pop(item, None)
|
| 77 |
+
threading.Timer(300, remove).start()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
+
def get_model_key(display):
|
| 80 |
+
return next((k for k, v in MODEL_MAPPING.items() if v == display), DEFAULT_MODEL_KEY)
|
| 81 |
|
| 82 |
+
def extract_file_content(fp):
|
| 83 |
+
ext = Path(fp).suffix.lower()
|
| 84 |
+
c = ""
|
| 85 |
try:
|
| 86 |
if ext == ".pdf":
|
| 87 |
+
with pdfplumber.open(fp) as pdf:
|
| 88 |
+
for p in pdf.pages:
|
| 89 |
+
t = p.extract_text() or ""
|
| 90 |
+
c += t + "\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
elif ext in [".doc", ".docx"]:
|
| 92 |
+
d = docx.Document(fp)
|
| 93 |
+
for para in d.paragraphs:
|
| 94 |
+
c += para.text + "\n"
|
| 95 |
elif ext in [".xlsx", ".xls"]:
|
| 96 |
+
df = pd.read_excel(fp)
|
| 97 |
+
c += df.to_csv(index=False)
|
| 98 |
elif ext in [".ppt", ".pptx"]:
|
| 99 |
+
prs = Presentation(fp)
|
| 100 |
+
for s in prs.slides:
|
| 101 |
+
for sh in s.shapes:
|
| 102 |
+
if hasattr(sh, "text") and sh.text:
|
| 103 |
+
c += sh.text + "\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
else:
|
| 105 |
+
c = Path(fp).read_text(encoding="utf-8")
|
| 106 |
except Exception as e:
|
| 107 |
+
c = f"{fp}: {e}"
|
| 108 |
+
return c.strip()
|
| 109 |
|
| 110 |
+
async def fetch_response_async(host, key, model, msgs, cfg, sid):
|
| 111 |
+
for t in [60, 80, 120, 240]:
|
|
|
|
| 112 |
try:
|
| 113 |
+
async with httpx.AsyncClient(timeout=t) as client:
|
| 114 |
+
r = await client.post(host, json={"model": model, "messages": msgs, **cfg, "session_id": sid}, headers={"Authorization": f"Bearer {key}"})
|
| 115 |
+
if r.status_code in LINUX_SERVER_ERRORS:
|
| 116 |
+
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
|
| 117 |
return None
|
| 118 |
+
r.raise_for_status()
|
| 119 |
+
j = r.json()
|
| 120 |
+
if isinstance(j, dict) and j.get("choices"):
|
| 121 |
+
ch = j["choices"][0]
|
| 122 |
+
if ch.get("message") and isinstance(ch["message"].get("content"), str):
|
| 123 |
+
return ch["message"]["content"]
|
| 124 |
return None
|
| 125 |
+
except:
|
| 126 |
continue
|
| 127 |
+
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
|
| 128 |
return None
|
| 129 |
|
| 130 |
+
async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt):
|
| 131 |
+
if not get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED) or not get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_ATTEMPTS):
|
| 132 |
return RESPONSES["RESPONSE_3"]
|
| 133 |
if not hasattr(sess, "session_id"):
|
| 134 |
sess.session_id = str(uuid.uuid4())
|
| 135 |
+
model_key = get_model_key(model_display)
|
| 136 |
+
cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
|
| 137 |
+
msgs = [{"role": "user", "content": u} for u, _ in history] + [{"role": "assistant", "content": a} for _, a in history if a]
|
| 138 |
+
if model_key == DEFAULT_MODEL_KEY and INTERNAL_TRAINING_DATA:
|
| 139 |
+
prompt = INTERNAL_TRAINING_DATA
|
| 140 |
+
else:
|
| 141 |
+
prompt = custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)
|
| 142 |
+
msgs.insert(0, {"role": "system", "content": prompt})
|
| 143 |
+
msgs.append({"role": "user", "content": user_input})
|
| 144 |
global ACTIVE_CANDIDATE
|
| 145 |
if ACTIVE_CANDIDATE:
|
| 146 |
+
res = await fetch_response_async(ACTIVE_CANDIDATE[0], ACTIVE_CANDIDATE[1], model_key, msgs, cfg, sess.session_id)
|
| 147 |
+
if res:
|
| 148 |
+
return res
|
| 149 |
ACTIVE_CANDIDATE = None
|
| 150 |
keys = get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED)
|
| 151 |
+
hosts = get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_ATTEMPTS)
|
| 152 |
+
cands = [(h, k) for h in hosts for k in keys]
|
| 153 |
+
random.shuffle(cands)
|
| 154 |
+
for h, k in cands:
|
| 155 |
+
res = await fetch_response_async(h, k, model_key, msgs, cfg, sess.session_id)
|
| 156 |
+
if res:
|
| 157 |
+
ACTIVE_CANDIDATE = (h, k)
|
| 158 |
+
return res
|
| 159 |
return RESPONSES["RESPONSE_2"]
|
| 160 |
|
| 161 |
+
async def respond_async(multi, history, model_display, sess, custom_prompt):
|
| 162 |
+
msg = {"text": multi.get("text", "").strip(), "files": multi.get("files", [])}
|
| 163 |
+
if not msg["text"] and not msg["files"]:
|
| 164 |
yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
|
| 165 |
return
|
| 166 |
+
inp = ""
|
| 167 |
+
for f in msg["files"]:
|
| 168 |
+
p = f["name"] if isinstance(f, dict) and "name" in f else f
|
| 169 |
+
inp += f"{Path(p).name}\n\n{extract_file_content(p)}\n\n"
|
| 170 |
+
if msg["text"]:
|
| 171 |
+
inp += msg["text"]
|
| 172 |
+
history.append([inp, ""])
|
| 173 |
+
ai = await chat_with_model_async(history, inp, model_display, sess, custom_prompt)
|
| 174 |
history[-1][1] = ""
|
| 175 |
+
def to_str(d):
|
| 176 |
+
if isinstance(d, (str, int, float)): return str(d)
|
| 177 |
+
if isinstance(d, bytes): return d.decode("utf-8", errors="ignore")
|
| 178 |
+
if isinstance(d, (list, tuple)): return "".join(map(to_str, d))
|
| 179 |
+
if isinstance(d, dict): return json.dumps(d, ensure_ascii=False)
|
| 180 |
+
return repr(d)
|
| 181 |
+
for c in ai:
|
| 182 |
+
history[-1][1] += to_str(c)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
await asyncio.sleep(0.0001)
|
| 184 |
yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
|
| 185 |
|
| 186 |
+
def change_model(new):
|
| 187 |
+
visible = new != MODEL_CHOICES[0]
|
| 188 |
+
default = SYSTEM_PROMPT_MAPPING.get(get_model_key(new), SYSTEM_PROMPT_DEFAULT)
|
| 189 |
+
return [], create_session(), new, default, gr.update(value=default, visible=visible)
|
| 190 |
|
| 191 |
with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
|
| 192 |
user_history = gr.State([])
|
| 193 |
user_session = gr.State(create_session())
|
| 194 |
selected_model = gr.State(MODEL_CHOICES[0] if MODEL_CHOICES else "")
|
| 195 |
+
custom_prompt_state = gr.State("")
|
| 196 |
chatbot = gr.Chatbot(label=AI_TYPES["AI_TYPE_1"], show_copy_button=True, scale=1, elem_id=AI_TYPES["AI_TYPE_2"])
|
| 197 |
with gr.Row():
|
| 198 |
msg = gr.MultimodalTextbox(show_label=False, placeholder=RESPONSES["RESPONSE_5"], interactive=True, file_count="single", file_types=ALLOWED_EXTENSIONS)
|
| 199 |
with gr.Accordion(AI_TYPES["AI_TYPE_6"], open=False):
|
| 200 |
model_dropdown = gr.Dropdown(show_label=False, choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
|
| 201 |
+
system_prompt = gr.Textbox(label=AI_TYPES["AI_TYPE_7"], lines=2, interactive=True, visible=False)
|
| 202 |
+
model_dropdown.change(fn=change_model, inputs=[model_dropdown], outputs=[user_history, user_session, selected_model, custom_prompt_state, system_prompt])
|
| 203 |
+
system_prompt.change(fn=lambda x: x, inputs=[system_prompt], outputs=[custom_prompt_state])
|
| 204 |
+
msg.submit(fn=respond_async, inputs=[msg, user_history, selected_model, user_session, custom_prompt_state], outputs=[chatbot, msg, user_session], api_name=INTERNAL_AI_GET_SERVER)
|
| 205 |
jarvis.launch(max_file_size="1mb")
|