Spaces:

langtech-innovation
/

wiki_tools

Paused

App Files Files Community

wiki_tools / app.py

nurasaki

Modified app.py: hide BASE_URL

57882da 2 months ago

raw

history blame contribute delete

7.21 kB

	import logging
	logging.basicConfig(level=logging.INFO, format='👉 [%(asctime)s][%(name)s][%(levelname)s] - %(message)s')
	logger = logging.getLogger(__name__)

	import gradio as gr
	from gradio import ChatMessage
	import json
	from openai import OpenAI
	from datetime import datetime
	import os
	import re

	from omegaconf import OmegaConf
	from dotenv import load_dotenv
	from tools import tools, oitools

	load_dotenv(".env", override=True)
	HF_TOKEN = os.environ.get("HF_TOKEN")
	BASE_URL = os.environ.get("BASE_URL")
	EMBEDDINGS = os.environ.get("EMBEDDINGS_MODEL")


	# =================================================================================
	# Load config file
	# =================================================================================
	config_file = os.path.join(os.path.dirname(__file__), "config/config.yaml")


	cfg = OmegaConf.load(config_file)
	SYSTEM_PROMPT_TEMPLATE = cfg.system_prompt


	try:
	logger.info("Initializing OpenAI client...")
	logger.info(f"BASE_URL: {BASE_URL[:15]}...")
	logger.info(f"HF_TOKEN: {HF_TOKEN[:6]}{'' (len(HF_TOKEN) - 6)}")

	client = OpenAI(
	base_url=BASE_URL,
	# base_url=f"{BASE_URL}/v1",
	api_key=HF_TOKEN
	)
	logger.info(f"Client initialized: {client}")

	except Exception as e:
	logger.error(f"Error initializing OpenAI client: {e}")
	raise e


	def today_date():
	return datetime.today().strftime('%A, %B %d, %Y, %I:%M %p')


	def clean_json_string(json_str):
	return re.sub(r'[ ,}\s]+$', '', json_str) + '}'


	def __DEPRE__get_summary(model, text):
	messages = [{"role": "system", "content": """You are an AI assistant that generates detailed and complete summaries of user-provided text. Your task is to produce a faithful resumen that preserves all key information, facts, and relevant points from the original content.

	### Summary Guidelines:

	- No Detail Skipping: Do not omit or simplify important content. Every critical fact, event, name, number, and nuance must be included.
	- Structured Clarity: Organize the summary clearly and logically. If the original has sections or topics, reflect that structure.
	- No Personal Input: Do not add opinions, interpretations, or external knowledge. Stay 100% faithful to the source text.
	- Conciseness with Completeness: Be as concise as possible without losing any important detail.

	Only produce the summary after fully reading and understanding the input text.
	"""}]
	messages.append({"role": "user", "content": f"TEXT:\n\n{text}"})

	request_params = {
	"model": model,
	"messages": messages,
	"stream": False,
	"max_tokens": 1000,
	"temperature": 0.1,
	#"presence_penalty": 0.3,
	#"frequency_penalty": 0.3,
	#"extra_body": {"repetition_penalty": 0.5},
	}

	return client.chat.completions.create(**request_params)


	def completion(history, model, system_prompt: str, tools=None):
	messages = [{"role": "system", "content": system_prompt.format(date=today_date())}]
	for msg in history:
	if isinstance(msg, dict):
	msg = ChatMessage(**msg)
	if msg.role == "assistant" and hasattr(msg, "metadata") and msg.metadata:
	tools_calls = json.loads(msg.metadata.get("title", "[]"))
	# for tool_calls in tools_calls:
	# tool_calls["function"]["arguments"] = json.loads(tool_calls["function"]["arguments"])
	messages.append({"role": "assistant", "tool_calls": tools_calls, "content": ""})
	messages.append({"role": "tool", "content": msg.content})
	else:
	messages.append({"role": msg.role, "content": msg.content})

	request_params = {
	"model": model,
	"messages": messages,
	"stream": True,
	"max_tokens": 1000,
	"temperature": 0.1,
	#"frequency_penalty": 0.1,
	"extra_body": {}, #"repetition_penalty": 0.9
	}
	if tools:
	request_params.update({"tool_choice": "auto", "tools": tools})

	return client.chat.completions.create(**request_params)


	def llm_in_loop(history, system_prompt, recursive):
	"""Handles the LLM interaction loop, invoking tools as necessary until a final response is generated.
	Args:
	history (list): The chat history between the user and the assistant.
	system_prompt (str): The system prompt to guide the LLM's behavior.
	recursive (int): The recursion depth for tool invocation.
	Yields:
	list: Updated chat history after each LLM interaction or tool invocation.
	"""


	try:
	models = client.models.list()
	model = models.data[0].id
	except Exception as err:
	gr.Warning("The model is initializing. Please wait; this may take 5 to 10 minutes ⏳.", duration=20)
	raise err

	arguments = ""
	name = ""

	# Get the streaming response from the LLM
	chat_completion = completion(history=history, tools=oitools, model=model, system_prompt=system_prompt)
	appended = False

	# Process the streaming response from the LLM
	for chunk in chat_completion:

	# Check if the chunk contains a tool call
	if chunk.choices and chunk.choices[0].delta.tool_calls:

	# Get the tool call details (function name and arguments)
	call = chunk.choices[0].delta.tool_calls[0]
	if hasattr(call.function, "name") and call.function.name:
	name = call.function.name
	if hasattr(call.function, "arguments") and call.function.arguments:
	arguments += call.function.arguments

	# Check if the chunk contains content from the assistant
	elif chunk.choices[0].delta.content:
	if not appended:
	history.append(ChatMessage(role="assistant", content=""))
	appended = True
	history[-1].content += chunk.choices[0].delta.content
	yield history[recursive:]


	arguments = clean_json_string(arguments) if arguments else "{}"
	print(name, arguments)
	arguments = json.loads(arguments)
	print(name, arguments)
	print("====================")

	if appended:
	recursive -= 1

	if name:
	try:
	result = str(tools[name].invoke(input=arguments))

	except Exception as err:
	result = f"💥 Error: {err}"

	history.append(ChatMessage(role="assistant", content=result, metadata={"title": json.dumps([{"id": "call_id", "function": {"arguments": json.dumps(arguments, ensure_ascii=False), "name": name}, "type": "function"}], ensure_ascii=False)}))
	yield history[recursive:]
	yield from llm_in_loop(history, system_prompt, recursive - 1)


	def respond(message, history, additional_inputs):
	history.append(ChatMessage(role="user", content=message))
	yield from llm_in_loop(history, additional_inputs, -1)


	if __name__ == "__main__":

	# Create Gradio Chat Interface
	system_prompt = gr.Textbox(label="System prompt", value=SYSTEM_PROMPT_TEMPLATE, lines=3)
	demo = gr.ChatInterface(respond, type="messages", additional_inputs=[system_prompt])
	demo.launch()