|
|
import logging |
|
|
logging.basicConfig(level=logging.INFO, format='👉 [%(asctime)s][%(name)s][%(levelname)s] - %(message)s') |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
import gradio as gr |
|
|
from gradio import ChatMessage |
|
|
import json |
|
|
from openai import OpenAI |
|
|
from datetime import datetime |
|
|
import os |
|
|
import re |
|
|
|
|
|
from omegaconf import OmegaConf |
|
|
from dotenv import load_dotenv |
|
|
from tools import tools, oitools |
|
|
|
|
|
load_dotenv(".env", override=True) |
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
BASE_URL = os.environ.get("BASE_URL") |
|
|
EMBEDDINGS = os.environ.get("EMBEDDINGS_MODEL") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config_file = os.path.join(os.path.dirname(__file__), "config/config.yaml") |
|
|
|
|
|
|
|
|
cfg = OmegaConf.load(config_file) |
|
|
SYSTEM_PROMPT_TEMPLATE = cfg.system_prompt |
|
|
|
|
|
|
|
|
try: |
|
|
logger.info("Initializing OpenAI client...") |
|
|
logger.info(f"BASE_URL: {BASE_URL[:15]}...") |
|
|
logger.info(f"HF_TOKEN: {HF_TOKEN[:6]}{'*' * (len(HF_TOKEN) - 6)}") |
|
|
|
|
|
client = OpenAI( |
|
|
base_url=BASE_URL, |
|
|
|
|
|
api_key=HF_TOKEN |
|
|
) |
|
|
logger.info(f"Client initialized: {client}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error initializing OpenAI client: {e}") |
|
|
raise e |
|
|
|
|
|
|
|
|
def today_date(): |
|
|
return datetime.today().strftime('%A, %B %d, %Y, %I:%M %p') |
|
|
|
|
|
|
|
|
def clean_json_string(json_str): |
|
|
return re.sub(r'[ ,}\s]+$', '', json_str) + '}' |
|
|
|
|
|
|
|
|
def __DEPRE__get_summary(model, text): |
|
|
messages = [{"role": "system", "content": """You are an AI assistant that generates **detailed and complete summaries** of user-provided text. Your task is to produce a **faithful resumen** that preserves **all key information**, facts, and relevant points from the original content. |
|
|
|
|
|
### Summary Guidelines: |
|
|
|
|
|
- **No Detail Skipping**: Do **not** omit or simplify important content. Every critical fact, event, name, number, and nuance must be included. |
|
|
- **Structured Clarity**: Organize the summary clearly and logically. If the original has sections or topics, reflect that structure. |
|
|
- **No Personal Input**: Do **not** add opinions, interpretations, or external knowledge. Stay 100% faithful to the source text. |
|
|
- **Conciseness with Completeness**: Be as concise as possible **without losing any important detail**. |
|
|
|
|
|
Only produce the summary after fully reading and understanding the input text. |
|
|
"""}] |
|
|
messages.append({"role": "user", "content": f"**TEXT**:\n\n{text}"}) |
|
|
|
|
|
request_params = { |
|
|
"model": model, |
|
|
"messages": messages, |
|
|
"stream": False, |
|
|
"max_tokens": 1000, |
|
|
"temperature": 0.1, |
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
return client.chat.completions.create(**request_params) |
|
|
|
|
|
|
|
|
def completion(history, model, system_prompt: str, tools=None): |
|
|
messages = [{"role": "system", "content": system_prompt.format(date=today_date())}] |
|
|
for msg in history: |
|
|
if isinstance(msg, dict): |
|
|
msg = ChatMessage(**msg) |
|
|
if msg.role == "assistant" and hasattr(msg, "metadata") and msg.metadata: |
|
|
tools_calls = json.loads(msg.metadata.get("title", "[]")) |
|
|
|
|
|
|
|
|
messages.append({"role": "assistant", "tool_calls": tools_calls, "content": ""}) |
|
|
messages.append({"role": "tool", "content": msg.content}) |
|
|
else: |
|
|
messages.append({"role": msg.role, "content": msg.content}) |
|
|
|
|
|
request_params = { |
|
|
"model": model, |
|
|
"messages": messages, |
|
|
"stream": True, |
|
|
"max_tokens": 1000, |
|
|
"temperature": 0.1, |
|
|
|
|
|
"extra_body": {}, |
|
|
} |
|
|
if tools: |
|
|
request_params.update({"tool_choice": "auto", "tools": tools}) |
|
|
|
|
|
return client.chat.completions.create(**request_params) |
|
|
|
|
|
|
|
|
def llm_in_loop(history, system_prompt, recursive): |
|
|
"""Handles the LLM interaction loop, invoking tools as necessary until a final response is generated. |
|
|
Args: |
|
|
history (list): The chat history between the user and the assistant. |
|
|
system_prompt (str): The system prompt to guide the LLM's behavior. |
|
|
recursive (int): The recursion depth for tool invocation. |
|
|
Yields: |
|
|
list: Updated chat history after each LLM interaction or tool invocation. |
|
|
""" |
|
|
|
|
|
|
|
|
try: |
|
|
models = client.models.list() |
|
|
model = models.data[0].id |
|
|
except Exception as err: |
|
|
gr.Warning("The model is initializing. Please wait; this may take 5 to 10 minutes ⏳.", duration=20) |
|
|
raise err |
|
|
|
|
|
arguments = "" |
|
|
name = "" |
|
|
|
|
|
|
|
|
chat_completion = completion(history=history, tools=oitools, model=model, system_prompt=system_prompt) |
|
|
appended = False |
|
|
|
|
|
|
|
|
for chunk in chat_completion: |
|
|
|
|
|
|
|
|
if chunk.choices and chunk.choices[0].delta.tool_calls: |
|
|
|
|
|
|
|
|
call = chunk.choices[0].delta.tool_calls[0] |
|
|
if hasattr(call.function, "name") and call.function.name: |
|
|
name = call.function.name |
|
|
if hasattr(call.function, "arguments") and call.function.arguments: |
|
|
arguments += call.function.arguments |
|
|
|
|
|
|
|
|
elif chunk.choices[0].delta.content: |
|
|
if not appended: |
|
|
history.append(ChatMessage(role="assistant", content="")) |
|
|
appended = True |
|
|
history[-1].content += chunk.choices[0].delta.content |
|
|
yield history[recursive:] |
|
|
|
|
|
|
|
|
arguments = clean_json_string(arguments) if arguments else "{}" |
|
|
print(name, arguments) |
|
|
arguments = json.loads(arguments) |
|
|
print(name, arguments) |
|
|
print("====================") |
|
|
|
|
|
if appended: |
|
|
recursive -= 1 |
|
|
|
|
|
if name: |
|
|
try: |
|
|
result = str(tools[name].invoke(input=arguments)) |
|
|
|
|
|
except Exception as err: |
|
|
result = f"💥 Error: {err}" |
|
|
|
|
|
history.append(ChatMessage(role="assistant", content=result, metadata={"title": json.dumps([{"id": "call_id", "function": {"arguments": json.dumps(arguments, ensure_ascii=False), "name": name}, "type": "function"}], ensure_ascii=False)})) |
|
|
yield history[recursive:] |
|
|
yield from llm_in_loop(history, system_prompt, recursive - 1) |
|
|
|
|
|
|
|
|
def respond(message, history, additional_inputs): |
|
|
history.append(ChatMessage(role="user", content=message)) |
|
|
yield from llm_in_loop(history, additional_inputs, -1) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
|
system_prompt = gr.Textbox(label="System prompt", value=SYSTEM_PROMPT_TEMPLATE, lines=3) |
|
|
demo = gr.ChatInterface(respond, type="messages", additional_inputs=[system_prompt]) |
|
|
demo.launch() |
|
|
|