diff --git a/.env b/.env
new file mode 100644
index 0000000000000000000000000000000000000000..d0a638549836aa2b5e5f404d5c130d4ad1b2d535
--- /dev/null
+++ b/.env
@@ -0,0 +1,86 @@
+# Thread Configuration - 线程配置
+# 生成时采用的最大线程数，5-10即可。会带来成倍的API调用费用，不要设置过高！
+MAX_THREAD_NUM=1
+
+
+# Server Configuration - Docker服务配置
+# 前端服务端口
+FRONTEND_PORT=80
+# 后端服务端口
+BACKEND_PORT=7869
+# 后端服务监听地址
+BACKEND_HOST=0.0.0.0
+# Gunicorn工作进程数
+WORKERS=4
+# 每个工作进程的线程数
+THREADS=2
+# 请求超时时间（秒）
+TIMEOUT=120
+
+# 是否启用在线演示
+# 不用设置，默认不启用
+ENABLE_ONLINE_DEMO=False
+
+# Backend Configuration - 后端配置
+# 导入小说时，最大的处理长度，超出该长度的文本不会进行处理，可以考虑增加
+MAX_NOVEL_SUMMARY_LENGTH=20000
+
+
+# MongoDB Configuration - MongoDB数据库配置
+# 安装了MongoDB才需要配置，否则不用改动
+# 是否启用MongoDB，启用后下面配置才有效
+ENABLE_MONGODB=false
+# MongoDB连接地址，使用host.docker.internal访问宿主机MongoDB
+MONGODB_URI=mongodb://host.docker.internal:27017/
+# MongoDB数据库名称
+MONGODB_DB_NAME=llm_api
+# 是否启用API缓存
+ENABLE_MONGODB_CACHE=true
+# 缓存命中后重放速度倍率
+CACHE_REPLAY_SPEED=2
+# 缓存命中后最大延迟时间（秒）
+CACHE_REPLAY_MAX_DELAY=5
+
+
+# API Cost Limits - API费用限制设置，需要依赖于MongoDB
+# 每小时费用上限（人民币）
+API_HOURLY_LIMIT_RMB=100
+# 每天费用上限（人民币）
+API_DAILY_LIMIT_RMB=500
+# 美元兑人民币汇率
+API_USD_TO_RMB_RATE=7
+
+
+# Wenxin API Settings - 文心API配置
+# 文心API的AK，获取地址：https://console.bce.baidu.com/qianfan/ais/console/applicationConsole/application
+WENXIN_AK=
+WENXIN_SK=
+WENXIN_AVAILABLE_MODELS=ERNIE-Novel-8K,ERNIE-4.0-8K,ERNIE-3.5-8K
+
+# Doubao API Settings - 豆包API配置
+# DOUBAO_ENDPOINT_IDS和DOUBAO_AVAILABLE_MODELS一一对应，有几个模型就对应几个endpoint_id，这是豆包强制要求的
+# 你可以自行设置DOUBAO_AVAILABLE_MODELS，不一定非要采用下面的
+DOUBAO_API_KEY=
+DOUBAO_ENDPOINT_IDS=
+DOUBAO_AVAILABLE_MODELS=doubao-pro-32k,doubao-lite-32k
+
+# GPT API Settings - GPT API配置
+
+GPT_AVAILABLE_MODELS=deepseek-r1,gemini-2.0-pro-exp-02-05,lmsys/claude-3-5-sonnet-20241022,windsurf/claude-3-5-sonnet,claude-3-5-sonnet-20240620,o3-mini,gpt-4-turbo-2024-04-09,gemini-2.0-flash-thinking-exp
+
+# Local Model Settings - 本地模型配置
+# 本地模型配置需要把下面的localhost替换为host.docker.internal，把8000替换为你的本地大模型服务端口
+# 把local-key替换为你的本地大模型服务API_KEY，把local-model-1替换为你的本地大模型服务模型名
+# 并且docker启动方式有变化，详细参考readme
+LOCAL_BASE_URL=http://localhost:8000/v1
+LOCAL_API_KEY=local-key
+LOCAL_AVAILABLE_MODELS=local-model-1
+
+# Zhipuai API Settings - 智谱AI配置
+ZHIPUAI_API_KEY=
+ZHIPUAI_AVAILABLE_MODELS=glm-4-air,glm-4-flashx
+
+# Default Model Settings - 默认模型设置
+# 例如：wenxin/ERNIE-Novel-8K, doubao/doubao-pro-32k, gpt/gpt-4o-mini, local/local-model-1
+DEFAULT_MAIN_MODEL=gpt/claude-3-5-sonnet-20240620
+DEFAULT_SUB_MODEL=gpt/gemini-2.0-pro-exp-02-05
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..0210528eddce432702c36d1f7c91e781435a8ac6
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,8 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+COPY . .
+RUN pip install -r requirements.txt
+
+CMD ["python app.py"]
\ No newline at end of file
diff --git a/README.md b/README.md
index ffd52203c0e10d39daee203ebe3865f39cb66e00..6bed1d85b16f98c907bd552a7d808288d16b27a2 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,11 @@
 ---
-title: Long
-emoji: 💻
-colorFrom: blue
-colorTo: gray
+title: long
+emoji: 👀
+colorFrom: purple
+colorTo: yellow
 sdk: docker
 pinned: false
+license: gpl-3.0
+app_port: 7869
 ---
 
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..45552fee635a82bb66f394c826e5a4ee47b982df
--- /dev/null
+++ b/app.py
@@ -0,0 +1,341 @@
+import json
+import time
+
+from flask import Flask, request, Response, jsonify
+from flask_cors import CORS
+app = Flask(__name__)
+CORS(app)
+
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from prompts.baseprompt import clean_txt_content, load_prompt
+
+from core.writer_utils import KeyPointMsg
+from core.draft_writer import DraftWriter
+from core.plot_writer import PlotWriter
+from core.outline_writer import OutlineWriter
+
+from setting import setting_bp
+from summary import process_novel
+from backend_utils import get_model_config_from_provider_model
+from config import MAX_NOVEL_SUMMARY_LENGTH, MAX_THREAD_NUM, ENABLE_ONLINE_DEMO
+
+
+app.register_blueprint(setting_bp)
+
+# 添加配置
+BACKEND_HOST = os.environ.get('BACKEND_HOST', '0.0.0.0')
+BACKEND_PORT = int(os.environ.get('BACKEND_PORT', 7869))
+
+
+@app.route('/health', methods=['GET'])
+def health_check():
+    return jsonify({
+        'status': 'healthy',
+        'timestamp': int(time.time())
+    }), 200
+
+
+def load_novel_writer(writer_mode, chunk_list, global_context, x_chunk_length, y_chunk_length, main_model, sub_model, max_thread_num) -> DraftWriter:
+    kwargs = dict(
+        xy_pairs=chunk_list,
+        model=get_model_config_from_provider_model(main_model),
+        sub_model=get_model_config_from_provider_model(sub_model),
+    )
+
+    kwargs['x_chunk_length'] = x_chunk_length
+    kwargs['y_chunk_length'] = y_chunk_length
+    kwargs['max_thread_num'] = max_thread_num
+    match writer_mode:
+        case 'draft':
+            kwargs['global_context'] = {}
+            novel_writer = DraftWriter(**kwargs)
+        case 'outline':
+            kwargs['global_context'] = {'summary': global_context}
+            novel_writer = OutlineWriter(**kwargs)
+        case 'plot':
+            kwargs['global_context'] = {'chapter': global_context}
+            novel_writer = PlotWriter(**kwargs)
+        case _:
+            raise ValueError(f"unknown writer: {writer_mode}")
+            
+    return novel_writer
+
+
+
+
+
+prompt_names = dict(
+    outline = ['新建章节', '扩写章节', '润色章节'],
+    plot = ['新建剧情', '扩写剧情', '润色剧情'],
+    draft = ['新建正文', '扩写正文', '润色正文'],
+)
+
+prompt_dirname = dict(
+    outline = 'prompts/创作章节',
+    plot = 'prompts/创作剧情',
+    draft = 'prompts/创作正文',
+)
+
+
+PROMPTS = {}
+for type_name, dirname in prompt_dirname.items():
+    PROMPTS[type_name] = {'prompt_names': prompt_names[type_name]}
+    for name in prompt_names[type_name]:
+        content = clean_txt_content(load_prompt(dirname, name))
+        if content.startswith("user:\n"):
+            content = content[len("user:\n"):]
+        PROMPTS[type_name][name] = {'content': content}
+
+
+@app.route('/prompts', methods=['GET'])
+def get_prompts():
+    return jsonify(PROMPTS)
+
+def get_delta_chunks(prev_chunks, curr_chunks):
+    """Calculate delta between previous and current chunks"""
+    if not prev_chunks or len(prev_chunks) != len(curr_chunks):
+        return "init", curr_chunks
+    
+    # Check if all strings in current chunks start with their corresponding previous strings
+    is_delta = True
+    for prev_chunk, curr_chunk in zip(prev_chunks, curr_chunks):
+        if len(prev_chunk) != len(curr_chunk):
+            is_delta = False
+            break
+        for prev_str, curr_str in zip(prev_chunk, curr_chunk):
+            if not curr_str.startswith(prev_str):
+                is_delta = False
+                break
+        if not is_delta:
+            break
+    
+    if not is_delta:
+        return "init", curr_chunks
+    
+    # Calculate deltas
+    delta_chunks = []
+    for prev_chunk, curr_chunk in zip(prev_chunks, curr_chunks):
+        delta_chunk = []
+        for prev_str, curr_str in zip(prev_chunk, curr_chunk):
+            delta_str = curr_str[len(prev_str):]
+            delta_chunk.append(delta_str)
+        delta_chunks.append(delta_chunk)
+    
+    return "delta", delta_chunks
+
+
+def call_write(writer_mode, chunk_list, global_context, chunk_span, prompt_content, x_chunk_length, y_chunk_length, main_model, sub_model, max_thread_num, only_prompt):
+    if ENABLE_ONLINE_DEMO:
+        if max_thread_num > MAX_THREAD_NUM:
+            raise Exception("在线Demo模型下，最大线程数不能超过" + str(MAX_THREAD_NUM) + "！")
+    
+    # 输入的chunk_list中每个chunk需要加上换行，除了最后一个chunk（因为是从页面中各个chunk传来的）
+    chunk_list = [[e.strip() + ('\n' if e.strip() and rowi != len(chunk_list)-1 else '') for e in row] for rowi, row in enumerate(chunk_list)]
+
+    prev_chunks = None
+    def delta_wrapper(chunk_list, done=False, msg=None):
+        # 返回的chunk_list中每个chunk需要去掉换行
+        chunk_list = [[e.strip() for e in row] for row in chunk_list]
+
+        nonlocal prev_chunks
+        if prev_chunks is None:
+            prev_chunks = chunk_list
+            return {
+                "done": done,
+                "chunk_type": "init",
+                "chunk_list": chunk_list,
+                "msg": msg
+            }
+        else:
+            chunk_type, new_chunks = get_delta_chunks(prev_chunks, chunk_list)
+            prev_chunks = chunk_list
+            return {
+                "done": done,
+                "chunk_type": chunk_type,
+                "chunk_list": new_chunks,
+                "msg": msg
+            }
+        
+    novel_writer = load_novel_writer(writer_mode, chunk_list, global_context, x_chunk_length, y_chunk_length, main_model, sub_model, max_thread_num)
+    
+
+    # draft需要映射，所以进行初始划分
+    if writer_mode == 'draft':
+        target_chunk = novel_writer.get_chunk(pair_span=chunk_span)
+        new_target_chunk = novel_writer.map_text_wo_llm(target_chunk)
+        novel_writer.apply_chunks([target_chunk], [new_target_chunk])
+        chunk_span = novel_writer.get_chunk_pair_span(new_target_chunk)
+    
+    init_novel_writer = load_novel_writer(writer_mode, list(novel_writer.xy_pairs), global_context, x_chunk_length, y_chunk_length, main_model, sub_model, max_thread_num)
+    
+    # TODO: writer.write 应该保证无论什么prompt，都能够同时适应y为空和y有值地情况
+    # 换句话说，就是虽然可以单列出一个"新建正文"，但用扩写正文也能实现同样的效果。
+    generator = novel_writer.write(prompt_content, pair_span=chunk_span) 
+    
+    prompt_outputs = []
+    last_yield_time = time.time()  # Initialize the last yield time
+
+    prompt_name = ''
+    for kp_msg in generator:
+        if isinstance(kp_msg, KeyPointMsg):
+            # 如果要支持关键节点保存，需要计算一个编辑上的更改，然后在这里yield writer
+            prompt_name = kp_msg.prompt_name
+            continue
+        else:
+            chunk_list = kp_msg
+
+        current_cost = 0
+        currency_symbol = ''
+        current_model = ''
+        data_chunks = []
+        prompt_outputs.clear()
+        for e in chunk_list:
+            if e is None: continue  # e为None说明该chunk还未处理
+            output, chunk = e
+            if output is None: continue # output为None说明该chunk未yield就return，说明未调用llm
+            prompt_outputs.append(output)
+            current_text = ""
+            current_model = output['response_msgs'].model
+            current_cost += output['response_msgs'].cost
+            currency_symbol = output['response_msgs'].currency_symbol
+            if 'plot2text' in output:
+                current_text += f"正在建立映射关系..." + '\n'
+            else:
+                current_text = output['text']
+            data_chunks.append((chunk.x_chunk, chunk.y_chunk, current_text))
+            
+        if only_prompt:
+            yield {'prompts': [e['response_msgs'] for e in prompt_outputs]}
+            return
+
+        current_time = time.time()
+        if current_time - last_yield_time >= 0.2:  # Check if 0.2 seconds have passed
+            yield delta_wrapper(data_chunks, done=False, msg=f"正在 {prompt_name} （{len(prompt_outputs)} / {len(chunk_list)}）" + f" 模型：{current_model} 花费：{current_cost:.5f}{currency_symbol}" if current_model else '')
+            last_yield_time = current_time  # Update the last yield time
+
+    # 这里是计算出一个编辑上的更改，方便前端显示，后续diff功能将不由writer提供，因为这是为了显示的要求
+    data_chunks = init_novel_writer.diff_to(novel_writer, pair_span=chunk_span)
+
+    yield delta_wrapper(data_chunks, done=True, msg='创作完成!')
+
+
+@app.route('/write', methods=['POST'])
+def write():
+    data = request.json                 
+    writer_mode = data['writer_mode']
+    chunk_list = data['chunk_list']
+    chunk_span = data['chunk_span']
+    prompt_content = data['prompt_content']
+    x_chunk_length = data['x_chunk_length']
+    y_chunk_length = data['y_chunk_length']
+    main_model = data['main_model']
+    sub_model = data['sub_model']
+    global_context = data['global_context']
+    only_prompt = data['only_prompt']
+    
+    # Update settings if provided
+    if 'settings' in data:
+        max_thread_num = data['settings']['MAX_THREAD_NUM']
+
+    # Generate unique stream ID
+    stream_id = str(time.time())
+    active_streams[stream_id] = True
+    
+    def generate():
+        try:
+            # Send stream ID to client
+            yield f"data: {json.dumps({'stream_id': stream_id})}\n\n"
+
+            for result in call_write(writer_mode, list(chunk_list), global_context, chunk_span, prompt_content, x_chunk_length, y_chunk_length, main_model, sub_model, max_thread_num, only_prompt):
+                if not active_streams.get(stream_id, False):
+                    # Stream was stopped by client
+                    print(f"Stream was stopped by client: {stream_id}")
+                    return
+                    
+                yield f"data: {json.dumps(result)}\n\n"
+        except Exception as e:
+            error_msg = f"创作出错：\n{str(e)}"
+            error_chunk_list = [[*e[:2], error_msg] for e in chunk_list[chunk_span[0]:chunk_span[1]]]
+            
+            error_data = {
+                "done": True,
+                "chunk_type": "init",
+                "chunk_list": error_chunk_list
+            }
+            yield f"data: {json.dumps(error_data)}\n\n"
+        finally:
+            # Clean up stream tracking
+            if stream_id in active_streams:
+                del active_streams[stream_id]
+
+    return Response(generate(), mimetype='text/event-stream')
+
+
+@app.route('/summary', methods=['POST'])
+def process_novel_text():
+    data = request.json
+    content = data['content']
+    novel_name = data['novel_name']
+
+    # Generate unique stream ID
+    stream_id = str(time.time())
+    active_streams[stream_id] = True
+
+    def generate():
+        try:
+            yield f"data: {json.dumps({'stream_id': stream_id})}\n\n"
+
+            main_model = get_model_config_from_provider_model(data['main_model'])
+            sub_model = get_model_config_from_provider_model(data['sub_model'])
+            max_novel_summary_length = data['settings']['MAX_NOVEL_SUMMARY_LENGTH']
+            max_thread_num = data['settings']['MAX_THREAD_NUM']
+            last_yield_time = 0
+            for result in process_novel(content, novel_name, main_model, sub_model, max_novel_summary_length, max_thread_num):
+                if not active_streams.get(stream_id, False):
+                    # Stream was stopped by client
+                    print(f"Stream was stopped by client: {stream_id}")
+                    return
+                    
+                current_time = time.time()
+                yield_value = f"data: {json.dumps(result)}\n\n"
+                if current_time - last_yield_time >= 0.2:
+                    last_yield_time = current_time
+                    yield yield_value
+            if current_time - last_yield_time < 0.2:
+                # Save last yield to yaml file
+                import yaml
+                result_dict = json.loads(yield_value.replace('data: ', '').strip())
+                with open('tmp.yaml', 'w', encoding='utf-8') as f:
+                    yaml.dump(result_dict, f, allow_unicode=True)
+                    
+                yield yield_value   # Ensure last yield is returned
+            
+        except Exception as e:
+            error_data = {
+                "progress_msg": f"处理出错：{str(e)}",
+            }
+            yield f"data: {json.dumps(error_data)}\n\n"
+        finally:
+            # Clean up stream tracking
+            if stream_id in active_streams:
+                del active_streams[stream_id]
+
+    return Response(generate(), mimetype='text/event-stream')
+
+# Dictionary to track active streams
+active_streams = {}
+
+@app.route('/stop_stream', methods=['POST'])
+def stop_stream():
+    data = request.json
+    stream_id = data.get('stream_id')
+    if stream_id in active_streams:
+        active_streams[stream_id] = False
+    return jsonify({'success': True})
+
+if __name__ == '__main__':
+    app.run(host=BACKEND_HOST, port=BACKEND_PORT, debug=False) 
\ No newline at end of file
diff --git a/backend_utils.py b/backend_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c9fd99b24cd5403ef646842f7b0ba01d716b39f
--- /dev/null
+++ b/backend_utils.py
@@ -0,0 +1,22 @@
+from llm_api import ModelConfig
+
+def get_model_config_from_provider_model(provider_model):
+    from config import API_SETTINGS
+    provider, model = provider_model.split('/', 1)
+    provider_config = API_SETTINGS[provider]
+    
+    if provider == 'doubao':
+        # Get the index of the model in available_models to find corresponding endpoint_id
+        model_index = provider_config['available_models'].index(model)
+        endpoint_id = provider_config['endpoint_ids'][model_index] if model_index < len(provider_config['endpoint_ids']) else ''
+        model_config = {**provider_config, 'model': model, 'endpoint_id': endpoint_id}
+    else:
+        model_config = {**provider_config, 'model': model}
+    
+    # Remove lists from config before creating ModelConfig
+    if 'available_models' in model_config:
+        del model_config['available_models']
+    if 'endpoint_ids' in model_config:
+        del model_config['endpoint_ids']
+
+    return ModelConfig(**model_config)
\ No newline at end of file
diff --git a/config.py b/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..661978cfb3cc3e9bea4321183de28c6112727c84
--- /dev/null
+++ b/config.py
@@ -0,0 +1,81 @@
+import os
+from dotenv import dotenv_values, load_dotenv
+
+print("Loading .env file...")
+env_path = os.path.join(os.path.dirname(__file__), '.env')
+if os.path.exists(env_path):
+    env_dict = dotenv_values(env_path)
+    
+    print("Environment variables to be loaded:")
+    for key, value in env_dict.items():
+        print(f"{key}={value}")
+    print("-" * 50)
+    
+    os.environ.update(env_dict)
+    print(f"Loaded environment variables from: {env_path}")
+else:
+    print("Warning: .env file not found")
+
+
+# Thread Configuration
+MAX_THREAD_NUM = int(os.getenv('MAX_THREAD_NUM', 5))
+
+
+MAX_NOVEL_SUMMARY_LENGTH = int(os.getenv('MAX_NOVEL_SUMMARY_LENGTH', 20000))
+
+# MongoDB Configuration
+ENABLE_MONOGODB = os.getenv('ENABLE_MONGODB', 'false').lower() == 'true'
+MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://127.0.0.1:27017/')
+MONOGODB_DB_NAME = os.getenv('MONGODB_DB_NAME', 'llm_api')
+ENABLE_MONOGODB_CACHE = os.getenv('ENABLE_MONGODB_CACHE', 'true').lower() == 'true'
+CACHE_REPLAY_SPEED = float(os.getenv('CACHE_REPLAY_SPEED', 2))
+CACHE_REPLAY_MAX_DELAY = float(os.getenv('CACHE_REPLAY_MAX_DELAY', 5))
+
+# API Cost Limits
+API_COST_LIMITS = {
+    'HOURLY_LIMIT_RMB': float(os.getenv('API_HOURLY_LIMIT_RMB', 100)),
+    'DAILY_LIMIT_RMB': float(os.getenv('API_DAILY_LIMIT_RMB', 500)),
+    'USD_TO_RMB_RATE': float(os.getenv('API_USD_TO_RMB_RATE', 7))
+}
+
+# API Settings
+API_SETTINGS = {
+    'wenxin': {
+        'ak': os.getenv('WENXIN_AK', ''),
+        'sk': os.getenv('WENXIN_SK', ''),
+        'available_models': os.getenv('WENXIN_AVAILABLE_MODELS', '').split(','),
+        'max_tokens': 4096,
+    },
+    'doubao': {
+        'api_key': os.getenv('DOUBAO_API_KEY', ''),
+        'endpoint_ids': os.getenv('DOUBAO_ENDPOINT_IDS', '').split(','),
+        'available_models': os.getenv('DOUBAO_AVAILABLE_MODELS', '').split(','),
+        'max_tokens': 4096,
+    },
+    'gpt': {
+        'base_url': os.getenv('GPT_BASE_URL', ''),
+        'api_key': os.getenv('GPT_API_KEY', ''),
+        'proxies': os.getenv('GPT_PROXIES', ''),
+        'available_models': os.getenv('GPT_AVAILABLE_MODELS', '').split(','),
+        'max_tokens': 4096,
+    },
+    'zhipuai': {
+        'api_key': os.getenv('ZHIPUAI_API_KEY', ''),
+        'available_models': os.getenv('ZHIPUAI_AVAILABLE_MODELS', '').split(','),
+        'max_tokens': 4096,
+    },
+    'local': {
+        'base_url': os.getenv('LOCAL_BASE_URL', ''),
+        'api_key': os.getenv('LOCAL_API_KEY', ''),
+        'available_models': os.getenv('LOCAL_AVAILABLE_MODELS', '').split(','),
+        'max_tokens': 4096,
+    }
+}
+
+for model in API_SETTINGS.values():
+    model['available_models'] = [e.strip() for e in model['available_models']]
+
+DEFAULT_MAIN_MODEL = os.getenv('DEFAULT_MAIN_MODEL', 'wenxin/ERNIE-Novel-8K')
+DEFAULT_SUB_MODEL = os.getenv('DEFAULT_SUB_MODEL', 'wenxin/ERNIE-3.5-8K')
+
+ENABLE_ONLINE_DEMO = os.getenv('ENABLE_ONLINE_DEMO', 'false').lower() == 'true'
\ No newline at end of file
diff --git a/core/__init__.py b/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a010e5f6b7b65ef88ee6e07de16bc89321935bf4
--- /dev/null
+++ b/core/__init__.py
@@ -0,0 +1 @@
+# core 模块为LongNovelGPT到2.0版本之间的过渡，在core模块中进行一些新功能和设计的尝试
diff --git a/core/backend.py b/core/backend.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9e283586a3a05688e02c23d55b94ea76745193a
--- /dev/null
+++ b/core/backend.py
@@ -0,0 +1,218 @@
+import time
+import importlib
+from core.draft_writer import DraftWriter
+from core.plot_writer import PlotWriter
+from core.outline_writer import OutlineWriter
+from core.writer_utils import KeyPointMsg
+from core.diff_utils import match_span_by_char
+import copy
+import types
+
+def load_novel_writer(writer, setting) -> DraftWriter:
+    current_w_name = writer['current_w']
+    current_w = writer[current_w_name]
+
+    kwargs = dict(
+        xy_pairs=list(current_w.get('xy_pairs', [['', '']])),
+        model=setting['model'],
+        sub_model=setting['sub_model'],
+    )
+
+    kwargs['x_chunk_length'] = current_w['x_chunk_length']
+    kwargs['y_chunk_length'] = current_w['y_chunk_length']
+
+    match current_w_name:
+        case 'draft_w':
+            novel_writer = DraftWriter(**kwargs)
+        case 'outline_w':
+            novel_writer = OutlineWriter(**kwargs)
+        case 'chapters_w' | 'plot_w':
+            novel_writer = PlotWriter(**kwargs)
+        case _:
+            raise ValueError(f"unknown writer: {current_w_name}")
+            
+    return novel_writer
+
+def dump_novel_writer(writer, novel_writer, apply_chunks={}, cost=0, currency_symbol='￥'):
+    new_writer = copy.deepcopy(writer)  # TODO: dump从设计角度上来说，不应该更改原有的writer，但是在此处copy可能更耗时
+
+    current_w_name = new_writer['current_w']
+    current_w = new_writer[current_w_name]
+
+    # if current_w_name == 'draft_w':
+    #     assert isinstance(novel_writer, DraftWriter), "draft_w需要传入DraftWriter"
+
+    current_w['xy_pairs'] = list(novel_writer.xy_pairs)
+        
+    current_w['current_cost'] = cost
+    current_w['currency_symbol'] = currency_symbol
+    #current_w['total_cost'] += current_w['current_cost']
+
+    current_w['apply_chunks'] = apply_chunks
+    
+    return new_writer
+    
+def call_write_long_novel(writer, setting):
+    writer = copy.deepcopy(writer)
+    progress = writer['progress']
+    
+    if not progress or True:
+        progress = dict(
+            cur_op_i = progress['cur_op_i'] if progress else 0,
+            ops = [
+                {
+                    'before_eval': 'writer["current_w"] = "outline_w"',
+                    'eval': 'call_write(writer, setting, False, "构思全书的大致剧情，并将其以一个故事的形式写下来，只写大致情节。")',
+                    'title': '创作大纲',
+                    'subtitle': '生成大纲'
+                },
+                {
+                    'eval': 'call_accept(writer, setting)',
+                },
+                {
+                    'eval': 'call_write(writer, setting, True, "对整个情节进行重写，使其更加有故事性。")',
+                    'title': '创作大纲',
+                    'subtitle': '润色大纲',
+                },
+                {
+                    'eval': 'call_accept(writer, setting)',
+                },
+                # 下面是创作剧情
+                {
+                    'before_eval': 'init_chapters_w(writer)',
+                    'eval': 'call_write(writer, setting, False, "丰富其中的剧情细节。")',
+                    'title': '创作剧情',
+                    'subtitle': '生成剧情'
+                },
+                {
+                    'eval': 'call_accept(writer, setting)',
+                },
+                {
+                    'eval': 'call_write(writer, setting, True, "对情节进行重写，使其有更多的剧情细节，同时更加有具有故事性。")',
+                    'title': '创作剧情',
+                    'subtitle': '扩充剧情',
+                },
+                {
+                    'eval': 'call_accept(writer, setting)',
+                },
+                # 下面是创作正文
+                {
+                    'before_eval': 'init_draft_w(writer)',
+                    'eval': 'call_write(writer, setting, False, "创作的是正文，而不是剧情，需要像一个小说家那样去描写这个故事。")',
+                    'title': '创作正文',
+                    'subtitle': '生成正文'
+                },
+                {
+                    'eval': 'call_accept(writer, setting)',
+                },
+                {
+                    'eval': 'call_write(writer, setting, True, "润色正文")',
+                    'title': '创作正文',
+                    'subtitle': '润色正文'
+                },
+                {
+                    'eval': 'call_accept(writer, setting)',
+                }
+            ]
+        )
+
+        # TODO: 考虑在init_plot时就给到上下文，类似rewrite_plot
+        
+        title, subtitle = '', ''
+        for op in progress['ops']:
+            if 'title' not in op:
+                op['title'], op['subtitle'] = title, subtitle
+            else:
+                title, subtitle = op['title'], op['subtitle']
+
+    
+    writer['progress'] = progress
+    yield writer
+
+    while progress['cur_op_i'] < len(progress['ops']):
+        current_op = progress['ops'][progress['cur_op_i']]
+        if 'before_eval' in current_op:
+            exec(current_op['before_eval'])
+        writer = yield from eval(current_op['eval'])
+        progress = writer['progress']
+        
+        progress['cur_op_i'] += 1
+        yield writer    # 当cur_op_i有更新时，也就标志着yield的是一个“稳定版本”的writer_state
+
+    return writer
+
+def match_quote_text(writer, setting, quote_text):
+    novel_writer = load_novel_writer(writer, setting)
+    y_text = novel_writer.y
+    quote_text_span, match_ratio = match_span_by_char(y_text, quote_text)
+    if match_ratio > 0.5:
+        aligned_span, _ = novel_writer.align_span(y_span=quote_text_span)
+        return aligned_span, y_text[aligned_span[0]:aligned_span[1]]
+    else:
+        return None, ''
+
+# 这是后端函数，接受前端writer_state的copy做为输入
+# 返回的是修改后的writer_state，注意yield的值一般被用于前端展示执行的过程和进度
+# 只有return值才会被前端考虑用于writer_state的更新
+def call_write(writer, setting, auto_write=False, suggestion=None):
+    novel_writer = load_novel_writer(writer, setting)
+
+    current_w = writer[writer['current_w']]
+    current_w['xy_pairs'] = list(novel_writer.xy_pairs)
+    
+    quote_span = writer['quote_span']
+
+    if auto_write:
+        assert quote_span is None, "auto_write模式下，不能有quote_text"
+        generator = novel_writer.auto_write()
+    else:
+        # TODO: writer.write 应该保证无论什么prompt，都能够同时适应y为空和y有值地情况
+        # 换句话说，就是虽然可以单列出一个“新建正文”，但用扩写正文也能实现同样的效果。
+        generator = novel_writer.write(suggestion, y_span=quote_span) 
+    
+    prompt_outputs = []
+    for kp_msg in generator:
+        if isinstance(kp_msg, KeyPointMsg):
+            # 如果要支持关键节点保存，需要计算一个编辑上的更改，然后在这里yield writer
+            yield kp_msg
+            continue
+        else:
+            chunk_list = kp_msg
+
+        current_cost = 0
+        apply_chunks = []
+        prompt_outputs.clear()
+        for output, chunk in chunk_list:
+            prompt_outputs.append(output)
+            current_text = ""
+            current_cost += output['response_msgs'].cost
+            currency_symbol = output['response_msgs'].currency_symbol
+            cost_info = f"\n(预计花费：{output['response_msgs'].cost:.4f}{output['response_msgs'].currency_symbol})"
+            if 'plot2text' in output:
+                current_text += f"正在建立映射关系..." + cost_info + '\n'
+            else:
+                current_text += output['text'] + cost_info + '\n'
+            apply_chunks.append((chunk, 'y_chunk', current_text))
+        
+        new_writer = dump_novel_writer(writer, novel_writer, apply_chunks=apply_chunks, cost=current_cost, currency_symbol=currency_symbol)
+        new_writer['prompt_outputs'] = prompt_outputs
+        yield new_writer
+
+    # 这里是计算出一个编辑上的更改，方便前端显示，后续diff功能将不由writer提供，因为这是为了显示的要求
+    apply_chunks = []
+    for chunk, key, value in load_novel_writer(writer, setting).diff_to(novel_writer):
+        apply_chunks.append((chunk, key, value))
+    writer[writer['current_w']]['apply_chunks'] = apply_chunks
+    writer['prompt_outputs'] = prompt_outputs
+    return writer
+
+def call_accept(writer, setting):
+    current_w_name = writer['current_w']
+    current_w = writer[current_w_name]
+
+    novel_writer = load_novel_writer(writer, setting)
+    for chunk, key, text in current_w['apply_chunks']:
+        novel_writer.apply_chunk(chunk, key, text)
+
+    writer = dump_novel_writer(writer, novel_writer)
+    return writer
diff --git a/core/diff_utils.py b/core/diff_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..67c86532d8bc7a2f132c2b8e7f2d4bddfb5a79bb
--- /dev/null
+++ b/core/diff_utils.py
@@ -0,0 +1,173 @@
+import difflib
+from difflib import SequenceMatcher
+
+
+def match_span_by_char(text, chunk):
+    # 用来存储从text中找到的符合匹配的行的span
+    spans = []
+
+    # 使用difflib来寻找最佳匹配行
+    matcher = difflib.SequenceMatcher(None, text, chunk)
+
+    # 获取匹配块信息
+    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+        if tag == 'equal':
+            # 记录匹配行的起始和结束索引
+            spans.append((i1, i2))
+    
+    if spans:
+        match_span = (spans[0][0], spans[-1][1])
+        match_ratio = sum(i2 - i1 for i1, i2 in spans) / len(chunk)
+        return match_span, match_ratio
+    else:
+        return None, 0
+
+def match_sequences(a_list, b_list):
+    """
+    匹配两个字符串列表，返回匹配的索引对
+    
+    Args:
+        a_list: 第一个字符串列表
+        b_list: 第二个字符串列表
+    
+    Returns:
+        list[((l,r), (j,k))]: 匹配的索引对列表，
+        其中(l,r)表示a_list的起始和结束索引，(j,k)表示b_list的起始和结束索引
+    """
+    m, n = len(a_list) - 1, len(b_list) - 1
+    matches = []
+    i = j = 0
+    
+    while i < m and j < n:
+        # 初始化当前最佳匹配
+        best_match = None
+        best_ratio = -1  # 设置匹配阈值
+        
+        # 尝试从当前位置开始的不同组合
+        for l in range(i, min(i + 3, m)):  # 限制向前查找的范围
+            current_a = ''.join(a_list[i:l + 1])
+            
+            for r in range(j, min(j + 3, n)):  # 限制向前查找的范围
+                current_b = ''.join(b_list[j:r + 1])
+                
+                # 使用已有的match_span_by_char函数计算匹配度
+                span1, ratio1 = match_span_by_char(current_b, current_a)
+                span2, ratio2 = match_span_by_char(current_a, current_b)
+                ratio = ratio1 * ratio2
+
+                if ratio > best_ratio:
+                    best_ratio = ratio
+                    best_match = ((i, l + 1), (j, r + 1))
+        
+        if best_match:
+            matches.append(best_match)
+            i = best_match[0][1]
+            j = best_match[1][1]
+        else:
+            # 如果没找到好的匹配，向前移动一步
+            i += 1
+            j += 1
+    
+    matches.append(((i, m+1), (j, n+1)))
+    
+    return matches
+
+def get_chunk_changes(source_chunk_list, target_chunk_list):
+    SEPARATOR = "%|%"
+    source_text = SEPARATOR.join(source_chunk_list)
+    target_text = SEPARATOR.join(target_chunk_list)
+    
+    # 初始化每个chunk的tag统计
+    source_chunk_stats = [{'delete_or_insert': 0, 'replace_or_equal': 0} for _ in source_chunk_list]
+    target_chunk_stats = [{'delete_or_insert': 0, 'replace_or_equal': 0} for _ in target_chunk_list]
+    
+    # 获取chunk的起始位置列表
+    source_positions = [0]
+    target_positions = [0]
+    pos = 0
+    for chunk in source_chunk_list[:-1]:
+        pos += len(chunk) + len(SEPARATOR)
+        source_positions.append(pos)
+    source_positions.append(len(source_text))
+    
+    pos = 0
+    for chunk in target_chunk_list[:-1]:
+        pos += len(chunk) + len(SEPARATOR)
+        target_positions.append(pos)
+    target_positions.append(len(target_text))
+    
+    def update_chunk_stats(positions, stats, start, end, tag):
+        for i in range(len(positions) - 1):
+            chunk_start = positions[i]
+            chunk_end = positions[i + 1]
+            
+            overlap_start = max(chunk_start, start)
+            overlap_end = min(chunk_end, end)
+            
+            if overlap_end > overlap_start:
+                stats[i][tag] += overlap_end - overlap_start
+    
+    matcher = SequenceMatcher(None, source_text, target_text)
+    
+    # 处理每个操作块并更新统计信息
+    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+        if tag == 'replace' or tag == 'equal':
+            update_chunk_stats(source_positions, source_chunk_stats, i1, i2, 'replace_or_equal')
+            update_chunk_stats(target_positions, target_chunk_stats, j1, j2, 'replace_or_equal')
+        elif tag == 'delete':
+            update_chunk_stats(source_positions, source_chunk_stats, i1, i2, 'delete_or_insert')
+        elif tag == 'insert':
+            update_chunk_stats(target_positions, target_chunk_stats, j1, j2, 'delete_or_insert')
+    
+    # 确定每个chunk的最终tag
+    def get_final_tag(stats):
+        return 'delete_or_insert' if stats['delete_or_insert'] > stats['replace_or_equal'] else 'replace_or_equal'
+    
+    source_chunk_tags = [get_final_tag(stats) for stats in source_chunk_stats]
+    target_chunk_tags = [get_final_tag(stats) for stats in target_chunk_stats]
+    
+    # 使用双指针计算changes
+    changes = []
+    i = j = 0  # i指向source_chunk_list，j指向target_chunk_list
+    start_i = start_j = 0
+    m, n = len(source_chunk_list), len(target_chunk_list)
+    while i < m or j < n:
+        if i < m and source_chunk_tags[i] == 'delete_or_insert':
+            while i < m and source_chunk_tags[i] == 'delete_or_insert': i += 1
+        elif j < n and target_chunk_tags[j] == 'delete_or_insert':
+            while j < n and target_chunk_tags[j] == 'delete_or_insert': j += 1
+        elif i < m and j < n and source_chunk_tags[i] == 'replace_or_equal' and target_chunk_tags[j] == 'replace_or_equal':
+            while i < m and j < n and source_chunk_tags[i] == 'replace_or_equal' and target_chunk_tags[j] == 'replace_or_equal':
+                i += 1
+                j += 1
+        else:
+            # TODO: 这个算法目前还有一些问题，即equal的对应
+            break
+            
+        # 当有任意一个指针移动时，检查是否需要添加change
+        if (i > start_i or j > start_j):
+            changes.append((start_i, i, start_j, j))
+            start_i, start_j = i, j
+    
+    if (i < m or j < n):
+        changes.append((start_i, m, start_j, n))
+
+    return changes
+
+
+# 使用示例
+def test_get_chunk_changes():
+    source_chunks = ['', '', '', '第3章 初露锋芒\n在高人指导下，萧炎的斗气水平迅速提升，开始在家族中引起注意。\n', '',  '第4章 异火初现\n萧炎得知“异火”的存在，决定踏上寻找异火的旅程。\n']
+    target_chunks = ['', '第3章 初露锋芒\n在高人指导下，萧炎的斗气水平迅速提升，开始在家族中引起注意。', '第3.5章 家族试炼\n萧炎参加家族举办的试炼，凭借新学的斗技和炼丹术，展现出超凡实力，获得家族长老的关注和认可。', '第4章 异火初现\n萧炎得知“异火”的存在，决定踏上寻找异火的旅程。']
+
+    changes = get_chunk_changes(source_chunks, target_chunks)
+    for change in changes:
+        print(f"Source chunks {change[0]}:{change[1]} -> Target chunks {change[2]}:{change[3]}")
+
+
+    for change in changes:
+        print('-' * 20)
+        print(f"{''.join(source_chunks[change[0]:change[1]])} -> {''.join(target_chunks[change[2]:change[3]])}")
+
+if __name__ == "__main__":
+    test_get_chunk_changes()
\ No newline at end of file
diff --git a/core/draft_writer.py b/core/draft_writer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4633cc3a2aacda1b09f3a5503136d74bca93b8e
--- /dev/null
+++ b/core/draft_writer.py
@@ -0,0 +1,46 @@
+from core.writer_utils import KeyPointMsg
+from core.writer import Writer
+
+from prompts.创作正文.prompt import main as prompt_draft
+from prompts.提炼.prompt import main as prompt_summary
+
+
+class DraftWriter(Writer):
+    def __init__(self, xy_pairs, global_context, model=None, sub_model=None, x_chunk_length=500, y_chunk_length=1000, max_thread_num=5):
+        super().__init__(xy_pairs, global_context, model, sub_model, x_chunk_length=x_chunk_length, y_chunk_length=y_chunk_length, max_thread_num=max_thread_num)
+
+    def write(self, user_prompt, pair_span=None):
+        target_chunk = self.get_chunk(pair_span=pair_span)
+        if not target_chunk.x_chunk:
+            raise Exception("需要提供剧情。")
+        if len(target_chunk.x_chunk) <= 5:
+            raise Exception("剧情不能少于5个字。")
+
+        chunks = self.get_chunks(pair_span)
+        
+        yield from self.batch_write_apply_text(chunks, prompt_draft, user_prompt)
+
+    def summary(self, pair_span=None):
+        target_chunk = self.get_chunk(pair_span=pair_span)
+        if not target_chunk.y_chunk:
+            raise Exception("没有正文需要总结。")
+        if len(target_chunk.y_chunk) <= 5:
+            raise Exception("需要总结的正文不能少于5个字。")
+        
+        # 先分割为更小的块，这样get_chunks才能正常工作
+        new_target_chunk = self.map_text_wo_llm(target_chunk)
+        self.apply_chunks([target_chunk], [new_target_chunk])
+        chunk_span = self.get_chunk_pair_span(new_target_chunk)
+
+        chunks = self.get_chunks(chunk_span, context_length_ratio=0)
+
+        yield from self.batch_write_apply_text(chunks, prompt_summary, "提炼剧情")
+
+    def split_into_chapters(self):
+        pass
+
+    def get_model(self):
+        return self.model
+
+    def get_sub_model(self):
+        return self.sub_model
diff --git a/core/frontend.py b/core/frontend.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd3cdde46c1accf85249236653d4d45232360f26
--- /dev/null
+++ b/core/frontend.py
@@ -0,0 +1,435 @@
+import re
+from rich.traceback import install
+install(show_locals=False)
+
+import gradio as gr
+import yaml
+import functools
+import time
+import sys
+import os
+import copy
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from config import RENDER_SAVE_LOAD_BTN, RENDER_STOP_BTN
+from core.backend import call_write, call_accept, match_quote_text
+from core.frontend_copy import enable_copy_js, on_copy
+from core.frontend_setting import new_setting, render_setting
+from core.frontend_utils import (
+    title, info,
+    create_progress_md, create_text_md, messages2chatbot,
+    init_writer, has_accept, is_running, try_cancel, writer_y_is_empty, writer_x_is_empty,
+    cancellable, process_writer_to_backend, process_writer_from_backend,
+    init_chapters_w, init_draft_w
+)
+from core.writer_utils import KeyPointMsg
+
+from prompts.baseprompt import clean_txt_content, load_prompt
+
+
+# 读取YAML文件
+with open('prompts/idea-examples.yaml', 'r', encoding='utf-8') as file:
+    examples_data = yaml.safe_load(file)
+
+# 准备示例列表
+examples = [[example['idea']] for example in examples_data['examples']]
+
+with gr.Blocks(head=enable_copy_js) as demo:
+    gr.HTML(title)
+    with gr.Accordion("使用指南"):
+        gr.Markdown(info)
+
+    writer_state = gr.State(init_writer('', check_empty=False))
+    setting_state = gr.State(new_setting())
+
+    if RENDER_SAVE_LOAD_BTN:
+        with gr.Row():
+            save_button = gr.Button("保存状态")
+            load_button = gr.Button("加载状态")
+            save_file_name = gr.Textbox(value='states', placeholder='输入文件名', lines=1, label=None, show_label=False, container=False)
+
+    def save_states(save_file_name, writer, setting):
+        import json
+        json_file_name = save_file_name + '.json'
+        with open(json_file_name, 'w', encoding='utf-8') as f:
+            json.dump({
+                'writer': writer,
+                'setting': setting
+            }, f, ensure_ascii=False, indent=2)
+        gr.Info(f"状态已保存到文件：{json_file_name}")
+
+    def load_states(save_file_name):
+        import json
+        json_file_name = save_file_name + '.json'
+        try:
+            with open(json_file_name, 'r', encoding='utf-8') as f:
+                states = json.load(f)
+            gr.Info(f"状态文件已加载：{json_file_name}")
+            states['setting']['render_time'] = time.time()
+            # 为了确保setting被渲染，选择模型是不会赋值setting_state的
+            # 需要保证setting界面持有的对象和setting_state是同一个
+            return states['writer'], states['setting']
+        except FileNotFoundError:
+            raise gr.Error(f"未找到保存的状态文件：{json_file_name}")
+
+    idea_textbox = gr.Textbox(placeholder='用一段话描述你要写的小说，或者从下方示例中选择一个创意...', lines=2, scale=1, label=None, show_label=False, container=False, max_length=1000)
+    
+    gr.Examples(
+        label='示例',
+        examples=examples,
+        inputs=[idea_textbox],
+    )
+
+    with gr.Row():    
+        outline_btn = gr.Button("创作大纲", scale=1, min_width=1, interactive = True, variant='primary')
+        chapters_btn = gr.Button("创作剧情", scale=1, min_width=1, interactive = False, variant='secondary')
+        draft_btn = gr.Button("创作正文", scale=1, min_width=1, interactive = False, variant='secondary')
+        auto_checkbox = gr.Checkbox(label='一键生成', scale=1, value=False, visible=False)  # TODO: V1.10版本 “自动”尚不完善，暂不显示
+
+    progress_md = create_progress_md(writer_state.value)
+    text_md = create_text_md(writer_state.value)
+
+    @gr.render(inputs=writer_state)
+    def create_prompt_preview(writer):
+        prompt_outputs = writer['prompt_outputs'] if 'prompt_outputs' in writer else []
+        with gr.Accordion("Prompt预览", open=bool(prompt_outputs)):
+            pause_on_prompt_finished_checkbox = gr.Checkbox(label='允许在LLM响应完成后，预览Prompt', scale=1, value=writer['pause_on_prompt_finished_flag']) 
+    
+            for i, prompt_output in enumerate(prompt_outputs, 1):
+                with gr.Tab(f"Prompt {i}"):
+                    gr.Chatbot(messages2chatbot(prompt_output['response_msgs']), type='messages')
+            if not prompt_outputs:
+                gr.Markdown('当前没有可预览的Prompt。')
+
+            continue_btn = gr.Button('继续', visible=bool(prompt_outputs), variant='primary')
+
+        def on_pause_on_prompt_finished(value):
+            if value:
+                gr.Info("在LLM响应完成后，将可以预览Prompt")
+            writer['pause_on_prompt_finished_flag'] = value
+        
+        pause_on_prompt_finished_checkbox.change(on_pause_on_prompt_finished, [pause_on_prompt_finished_checkbox])
+
+        def on_continue(writer):
+            writer['pause_flag'] = False
+            writer['prompt_outputs'] = []
+            return writer
+        
+        continue_btn.click(on_continue, writer_state, writer_state)
+
+
+    with gr.Row():
+        rewrite_all_button = gr.Button("开始创作", min_width=100, scale=2, variant='secondary', interactive=False)
+        suggestion_dropdown = gr.Dropdown(choices=[], min_width=100, scale=2, label=None, show_label=False, container=False, allow_custom_value=False)
+        quote_checkbox = gr.Checkbox(label='允许引用', min_width=100, scale=2, value=False)
+        gr.Textbox('窗口大小：', container=False, text_align='right', scale=1, min_width=100)
+        chunk_length_dropdown = gr.Dropdown(choices=[], min_width=80, scale=1, label=None, show_label=False, container=False, allow_custom_value=False)
+
+    quote_md = gr.Markdown(visible=False)
+    
+    def on_quote_checkbox_change(writer, value):
+        if writer['current_w'] == 'outline_w':
+            gr.Info("大纲创作不支持引用\n考虑在剧情和正文创作中使用吧~")
+            return gr.update(value=False, visible=False)
+        
+        if value:
+            gr.Info("允许引用（右键或Ctrl+C复制你想引用的文本）")
+        writer['quote_span'] = None
+        writer['quoted_text'] = ''
+        return gr.update(value=None, visible=False)
+
+    quote_checkbox.change(on_quote_checkbox_change, [writer_state, quote_checkbox], [quote_md])
+
+    def on_chunk_length_change(writer, value):
+        current_w_name = writer['current_w']
+        writer[current_w_name]['y_chunk_length'] = value
+        return gr.update(value=value)
+
+    chunk_length_dropdown.change(on_chunk_length_change, [writer_state, chunk_length_dropdown], [chunk_length_dropdown])
+
+    def on_copy_handle(text, writer, setting, quote_checkbox):
+        # gr.Info(f"Copy: {text}")
+        text = text.strip()
+
+        if has_accept(writer):
+            gr.Info('考虑先接受或拒绝修改哦~')
+            return gr.update(visible=False)
+
+        if len(text) < 10:
+            gr.Info('选中的文本太短，无法引用')
+            return gr.update(visible=False)
+        
+        if quote_checkbox:
+            quote_span, quoted_text = match_quote_text(writer, setting, text)
+            if quote_span:
+                writer['quote_span'] = quote_span
+                writer['quoted_text'] = quoted_text
+                lines = quoted_text.split('\n')
+                if len(lines) > 10:
+                    lines[5:-5] = ['......']
+                lines = ['```', ] + lines + ['```', ]
+                quoted_text = '\n'.join(["> " + e for e in lines])
+                return gr.update(value=quoted_text, visible=True)
+            else:
+                gr.Info('未找到匹配的引用文本')
+
+        writer['quote_span'] = None
+        writer['quoted_text'] = ''
+        return gr.update(visible=False)
+
+    on_copy(on_copy_handle, [writer_state, setting_state, quote_checkbox], [quote_md])
+    
+
+    suggestion_textbox = gr.Textbox(max_length=1000, placeholder='在这里输入你的意见，或者从右上单选框选择', lines=2, scale=1, label=None, show_label=False, container=False)
+
+    with gr.Row():    
+        accept_button = gr.Button("接受", scale=1, min_width=1, variant='secondary', interactive=False)
+        pause_button = gr.Button("暂停", scale=1, min_width=1, variant='secondary', visible=RENDER_STOP_BTN)
+        stop_button = gr.Button("取消", scale=1, min_width=1, variant='secondary')
+        flash_button = gr.Button("刷新", scale=1, min_width=1, variant='secondary')
+
+    def flash_interface(writer):
+        current_w_name = writer['current_w']
+
+        can_accept_flag = has_accept(writer) and not is_running(writer)
+        can_write_flag = not writer_x_is_empty(writer, current_w_name) and not can_accept_flag
+
+        match current_w_name:
+            case 'outline_w':
+                rewrite_all_button = gr.update(value='开始创作', variant='primary' if can_write_flag else 'secondary', interactive=can_write_flag)
+            case 'chapters_w':
+                rewrite_all_button = gr.update(value='开始创作', variant='primary' if can_write_flag else 'secondary', interactive=can_write_flag)
+            case 'draft_w':
+                rewrite_all_button = gr.update(value='开始创作', variant='primary' if can_write_flag else 'secondary', interactive=can_write_flag)
+
+        accept_button = gr.update(variant='primary' if can_accept_flag else 'secondary', interactive=can_accept_flag)
+        
+        # 更新 chapters_btn 和 draft_btn 的 interactive 状态
+        outline_btn = gr.update(
+            variant='primary' if current_w_name == 'outline_w' else 'secondary'
+            )
+        chapters_btn = gr.update(
+            interactive=not writer_y_is_empty(writer, 'outline_w'),
+            variant='primary' if current_w_name == 'chapters_w' else 'secondary'
+        )
+        draft_btn = gr.update(
+            interactive=not writer_y_is_empty(writer, 'chapters_w'),
+            variant='primary' if current_w_name == 'draft_w' else 'secondary'
+        )
+
+        pause_button = gr.update(
+            value="继续" if writer['pause_flag'] else "暂停",
+            variant='secondary',
+        )
+
+        suggestion_choices = writer['suggestions'][current_w_name]
+        # suggestion_choices = ['自动', ] + writer['suggestions'][current_w_name]  # TODO: V1.10版本 “自动”尚不完善，暂不显示
+        if writer_y_is_empty(writer, current_w_name):
+            suggestion_dropdown = gr.update(choices=suggestion_choices, value=suggestion_choices[0])
+        else:
+            suggestion_dropdown = gr.update(choices=suggestion_choices,)
+
+        chunk_length_choices = writer['chunk_length'][current_w_name]
+        if cur_chunk_length := writer[current_w_name].get('y_chunk_length', None):
+            chunk_length_dropdown = gr.update(choices=chunk_length_choices, value=cur_chunk_length)
+        else:
+            chunk_length_dropdown = gr.update(choices=chunk_length_choices, value=chunk_length_choices[0])
+
+        return (
+            create_text_md(writer),
+            create_progress_md(writer),
+            rewrite_all_button,
+            accept_button,
+            outline_btn,
+            chapters_btn,
+            draft_btn,
+            pause_button,
+            suggestion_dropdown,
+            chunk_length_dropdown
+        )
+
+    # 更新 flash_event 字典以包含新的输出
+    flash_event = dict(
+        fn=flash_interface, 
+        inputs=[writer_state], 
+        outputs=[
+            text_md,
+            progress_md,
+            rewrite_all_button,
+            accept_button,
+            outline_btn,
+            chapters_btn,
+            draft_btn,
+            pause_button,
+            suggestion_dropdown,
+            chunk_length_dropdown
+        ]
+    )
+    
+    flash_button.click(**flash_event)
+    if RENDER_SAVE_LOAD_BTN:
+        save_button.click(save_states, inputs=[save_file_name, writer_state, setting_state], outputs=[])
+        load_button.click(load_states, inputs=[save_file_name], outputs=[writer_state, setting_state]).success(**flash_event)
+    # stop_write_long_novel_button.click(on_cancel, inputs=[writer_state])
+    stop_button.click(try_cancel, inputs=[writer_state]).success(**flash_event).success(lambda :gr.update(), None, writer_state)
+    # TODO: stop_btn对writer_state的更新没有起效
+
+    @cancellable
+    def _on_write_all(writer, setting, auto_write=False, suggestion=None):
+        current_w_name = writer['current_w']
+           
+        if writer_x_is_empty(writer, current_w_name):
+            gr.Info('请先输入需要创作的内容！')
+            return
+        
+        writer['prompt_outputs'].clear()
+
+        if writer['quote_span']:
+            quote_span, quoted_text = match_quote_text(writer, setting, writer['quoted_text'])
+            if quote_span != writer['quote_span'] or quoted_text != writer['quoted_text']:
+                raise gr.Error('引用文本不存在！')
+
+        generator = call_write(process_writer_to_backend(writer), setting, auto_write, suggestion)
+
+        new_writer = None
+        while True:
+            try:
+                kp_msg = next(generator)
+                if isinstance(kp_msg, KeyPointMsg):
+                    # TODO: 由于KeyPointMsg的设计问题，这里的逻辑比较复杂，后续可以考虑优化
+                    if kp_msg.is_prompt() and kp_msg.is_finished() and writer['pause_on_prompt_finished_flag']:
+                        gr.Info('LLM响应完成，可以预览Prompt')  
+                        writer['pause_flag'] = True
+                        if new_writer is None: continue
+                    elif kp_msg.is_title(): # TODO: 标题节点还未实现finish逻辑
+                        # if new_writer is not None:
+                        #     # 说明这是一个关键节点，进行保存
+                        #     process_writer_from_backend(writer, new_writer)
+                        #     yield create_text_md(writer), writer
+                        #     gr.Info(f'已自动保存进度')
+                        continue
+                        # 关键节点保存的逻辑比较复杂，有bug，之后版本考虑提供
+                    else:
+                        continue
+                else:
+                    new_writer = kp_msg
+                
+                if writer['pause_flag']:
+                    writer['prompt_outputs'] = copy.deepcopy(new_writer['prompt_outputs'])  
+                    # 将prompt_outputs传递到writer_state中，使得暂停时能显示prompt, 需要序列化，否则writer会不断更新，导致prompt不断渲染
+                    yield create_text_md(new_writer), writer
+
+                    while writer['pause_flag'] and not writer['cancel_flag']:
+                        time.sleep(0.1)
+                else:
+                    yield create_text_md(new_writer), gr.update()
+            except StopIteration as e:
+                # 这里处理最终状态
+                process_writer_from_backend(writer, e.value)
+                yield create_text_md(writer), writer
+                if has_accept(writer):
+                    gr.Info('创作完成！点击接受按钮接受修改。')
+                else:
+                    gr.Info('本次创作没有任何更改。')  # 通常因为审阅意见认为无需更改
+                return
+        
+    def on_auto_write_all(writer, setting, auto_write):
+        if auto_write:
+            yield from _on_write_all(writer, setting, True)
+        else:
+            pass
+            # suggestion = writer['suggestions'][writer['current_w']][0]
+            # yield from _on_write_all(writer, setting, False, suggestion)
+
+    writer_all_events = dict(
+            fn=on_auto_write_all,
+            queue=True,
+            inputs=[writer_state, setting_state, auto_checkbox],
+            outputs=[text_md, writer_state],
+            concurrency_limit=10
+    )
+
+    def on_init_outline(idea, writer):
+        if not idea.strip():
+            gr.Info("先输入小说简介或从示例中选择一个")
+            return gr.update()
+        new_writer = init_writer(idea)
+        writer.update({
+            k:v for k, v in new_writer.items() if k in ['current_w', 'outline_w', 'prompt_outputs']
+        })
+        return writer
+    
+    outline_btn.click(on_init_outline, inputs=[idea_textbox, writer_state], outputs=[writer_state]).success(**writer_all_events).then(**flash_event)
+    chapters_btn.click(lambda writer: init_chapters_w(writer), inputs=[writer_state], outputs=[writer_state]).success(**writer_all_events).then(**flash_event)
+    draft_btn.click(lambda writer: init_draft_w(writer), inputs=[writer_state], outputs=[writer_state]).success(**writer_all_events).then(**flash_event)
+
+    def on_select_suggestion(writer, setting, choice):
+        if choice == '自动':
+            return gr.update(value=choice, visible=False)
+
+        current_w_name = writer['current_w']
+        dirname = writer['suggestions_dirname'][current_w_name]
+        suggestion = clean_txt_content(load_prompt(dirname, choice))
+        if suggestion.startswith("user:\n"):
+            suggestion = suggestion[len("user:\n"):]
+        
+        return gr.update(value=suggestion, visible=True)
+    
+    suggestion_dropdown.change(on_select_suggestion, inputs=[writer_state, setting_state, suggestion_dropdown], outputs=[suggestion_textbox])
+
+    def on_write_all(writer, setting, suggestion):
+        if not suggestion.strip():
+            gr.Info('需要输入创作意见！')
+            return
+        yield from _on_write_all(writer, setting, False, suggestion)
+        
+    rewrite_all_button.click(
+            on_write_all,
+            queue=True,
+            inputs=[writer_state, setting_state, suggestion_textbox],
+            outputs=[text_md, writer_state],
+            concurrency_limit=10
+        ).then(**flash_event)    
+
+
+    @cancellable
+    def on_accept_write(writer, setting):
+        current_w_name = writer['current_w']
+        current_w = writer[current_w_name]
+        
+        if not current_w['apply_chunks']:
+            raise gr.Error('请先进行创作！')
+        
+        new_writer = call_accept(process_writer_to_backend(writer), setting)
+        process_writer_from_backend(writer, new_writer)
+        yield create_text_md(writer), writer
+    
+    accept_button.click(fn=on_accept_write, inputs=[writer_state, setting_state], outputs=[text_md, writer_state]).then(**flash_event)
+
+    def toggle_pause(writer):
+        if not is_running(writer):
+            gr.Info('当前没有正在进行的操作')
+            return gr.update()
+        
+        writer['pause_flag'] = not writer['pause_flag']
+        # gr.Info('已' + ('暂停' if writer['pause_flag'] else '继续') + '操作')
+        return gr.update(value="暂停" if not writer['pause_flag'] else "继续")
+
+    pause_button.click(
+        toggle_pause,
+        inputs=[writer_state],
+        outputs=[pause_button]
+    )
+
+    @gr.render(inputs=setting_state)
+    def _render_setting(setting):
+        return render_setting(setting, setting_state)
+
+
+demo.queue()
+demo.launch(server_name="0.0.0.0", server_port=7860)
+#demo.launch()
+
+
diff --git a/core/frontend_copy.py b/core/frontend_copy.py
new file mode 100644
index 0000000000000000000000000000000000000000..436d0a6b790e63fcbf7837e548e07e2c39f9170b
--- /dev/null
+++ b/core/frontend_copy.py
@@ -0,0 +1,35 @@
+import gradio as gr
+
+enable_copy_js = """
+<script>
+document.addEventListener('copy', function(e) {
+    // 获取选中的文本
+    var selectedText = window.getSelection().toString();
+    if(selectedText) {
+        // 直接触发 gradio 组件的更新
+        const textbox = document.getElementById('copy_textbox');
+        if(textbox) {
+            textbox.querySelector('textarea').value = selectedText;
+            // 触发 change 事件以更新 Gradio 状态
+            textbox.querySelector('textarea').dispatchEvent(new Event('input', { bubbles: true }));
+        }
+    }
+});
+</script>
+"""
+
+def on_copy(fn, inputs, outputs):
+    copy_textbox = gr.Textbox(elem_id="copy_textbox", visible=False)
+    return copy_textbox.change(fn, [copy_textbox] + inputs, outputs)
+
+
+# with gr.Blocks(head=enable_copy_js) as demo:
+#     gr.Markdown("Hello\nTest Copy")
+#     copy_textbox = gr.Textbox(elem_id="copy_textbox", visible=False)
+
+#     def copy_handle(text):
+#         gr.Info(text)
+    
+#     copy_textbox.change(copy_handle, copy_textbox)
+    
+# demo.launch()
\ No newline at end of file
diff --git a/core/frontend_setting.py b/core/frontend_setting.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ded42c3786dfd0b2b2bbe0d38b5b87ae67d10d0
--- /dev/null
+++ b/core/frontend_setting.py
@@ -0,0 +1,345 @@
+import gradio as gr
+from enum import Enum, auto
+
+from llm_api import ModelConfig, wenxin_model_config, doubao_model_config, gpt_model_config, zhipuai_model_config, test_stream_chat
+from config import API_SETTINGS, RENDER_SETTING_API_TEST_BTN, ENABLE_SETTING_SELECT_SUB_MODEL
+
+
+class Provider:
+    GPT = "GPT(OpenAI)"
+    WENXIN = "文心(百度)"
+    DOUBAO = "豆包(字节跳动)"
+    ZHIPUAI = "GLM(智谱)"
+    OTHERS = '其他'
+
+def deep_update(d, u):
+    """Recursively update dictionary d with values from dictionary u"""
+    for k, v in u.items():
+        if isinstance(v, dict) and k in d and isinstance(d[k], dict):
+            deep_update(d[k], v)
+        else:
+            d[k] = v
+
+def new_setting():
+    model_config = API_SETTINGS.pop('model')
+    sub_model_config = API_SETTINGS.pop('sub_model')
+
+    new_setting = dict(
+        model=ModelConfig(**model_config),
+        sub_model=ModelConfig(**sub_model_config),
+        render_count=0,
+        provider_name=Provider.GPT,
+        wenxin={
+            'ak': '',
+            'sk': '',
+            'default_model': 'ERNIE-Novel-8K',
+            'default_sub_model': 'ERNIE-3.5-8K',
+            'available_models': list(wenxin_model_config.keys())
+        },
+        doubao={
+            'api_key': '',
+            'main_endpoint_id': '',
+            'sub_endpoint_id': '',
+            'default_model': 'doubao-pro-32k',
+            'default_sub_model': 'doubao-lite-32k',
+            'available_models': list(doubao_model_config.keys())
+        },
+        gpt={
+            'api_key': '',
+            'base_url': '',
+            'proxies': '',
+            'default_model': 'gpt-4o',
+            'default_sub_model': 'gpt-4o-mini',
+            'available_models': list(gpt_model_config.keys())
+        },
+        zhipuai={
+            'api_key': '',
+            'default_model': 'glm-4-plus',
+            'default_sub_model': 'glm-4-flashx',
+            'available_models': list(zhipuai_model_config.keys())
+        },
+        others={
+            'api_key': '',
+            'base_url': '',
+            'default_model': '',
+            'default_sub_model': '',
+            'available_models': []
+        }
+    )
+
+    deep_update(new_setting, API_SETTINGS)
+
+    return new_setting
+
+# @gr.render(inputs=setting_state)
+def render_setting(setting, setting_state):
+    with gr.Accordion("API 设置"):
+        with gr.Row():
+            provider_name = gr.Dropdown(
+                choices=[Provider.GPT, Provider.WENXIN, Provider.DOUBAO, Provider.ZHIPUAI, Provider.OTHERS],
+                value=setting['provider_name'],
+                label="模型提供商",
+                scale=1
+            )
+
+            def on_select_provider(provider_name):
+                setting['provider_name'] = provider_name
+                return setting
+            
+            provider_name.select(fn=on_select_provider, inputs=provider_name, outputs=[setting_state])
+
+            match setting['provider_name']:
+                case Provider.WENXIN:
+                    provider_config = setting['wenxin']
+                case Provider.DOUBAO:
+                    provider_config = setting['doubao']
+                case Provider.GPT:
+                    provider_config = setting['gpt']
+                case Provider.ZHIPUAI:
+                    provider_config = setting['zhipuai']
+                case Provider.OTHERS:
+                    provider_config = setting['others']
+
+            main_model = gr.Dropdown(
+                choices=provider_config['available_models'],
+                value=provider_config['default_model'],
+                label="主模型",
+                scale=1,
+                allow_custom_value=setting['provider_name'] == Provider.OTHERS
+            )
+
+            sub_model = gr.Dropdown(
+                choices=provider_config['available_models'],
+                value=provider_config['default_sub_model'],
+                label="辅助模型",
+                scale=1,
+                allow_custom_value=setting['provider_name'] == Provider.OTHERS,
+                interactive=ENABLE_SETTING_SELECT_SUB_MODEL
+            )
+
+        with gr.Row():
+            if setting['provider_name'] == Provider.WENXIN:
+                baidu_access_key = gr.Textbox(
+                    value=provider_config['ak'],
+                    label='Baidu Access Key',
+                    lines=1,
+                    placeholder='Enter your Baidu access key here',
+                    interactive=True,
+                    scale=10,
+                    type='password'
+                )
+                baidu_secret_key = gr.Textbox(
+                    value=provider_config['sk'],
+                    label='Baidu Secret Key',
+                    lines=1,
+                    placeholder='Enter your Baidu secret key here',
+                    interactive=True,
+                    scale=10,
+                    type='password'
+                )
+
+            elif setting['provider_name'] == Provider.DOUBAO:
+                doubao_api_key = gr.Textbox(
+                    value=provider_config['api_key'],
+                    label='Doubao API Key',
+                    lines=1,
+                    placeholder='Enter your Doubao API key here',
+                    interactive=True,
+                    scale=10,
+                    type='password'
+                )
+                main_endpoint_id = gr.Textbox(
+                    value=provider_config['main_endpoint_id'],
+                    label='Main Endpoint ID',
+                    lines=1,
+                    placeholder='Enter your main endpoint ID here',
+                    interactive=True,
+                    scale=10,
+                    type='password'
+                )
+                sub_endpoint_id = gr.Textbox(
+                    value=provider_config['sub_endpoint_id'],
+                    label='Sub Endpoint ID',
+                    lines=1,
+                    placeholder='Enter your sub endpoint ID here',
+                    interactive=True,
+                    scale=10,
+                    type='password'
+                )
+
+            elif setting['provider_name'] in [Provider.GPT, Provider.OTHERS]:
+                gpt_api_key = gr.Textbox(
+                    value=provider_config['api_key'],
+                    label='OpenAI API Key',
+                    lines=1,
+                    placeholder='Enter your OpenAI API key here',
+                    interactive=True,
+                    scale=10,
+                    type='password'
+                )
+                base_url = gr.Textbox(
+                    value=provider_config['base_url'],
+                    label='API Base URL',
+                    lines=1,
+                    placeholder='Enter API base URL here',
+                    interactive=True,
+                    scale=10,
+                    type='password'
+                )
+
+            elif setting['provider_name'] == Provider.ZHIPUAI:
+                zhipuai_api_key = gr.Textbox(
+                    value=provider_config['api_key'],
+                    label='ZhipuAI API Key',
+                    lines=1,
+                    placeholder='Enter your ZhipuAI API key here',
+                    interactive=True,
+                    scale=10,
+                    type='password'
+                )
+
+        with gr.Row():
+            if setting['provider_name'] == Provider.WENXIN:
+                def on_submit(main_model, sub_model, baidu_access_key, baidu_secret_key):
+                    provider_config['ak'] = baidu_access_key
+                    provider_config['sk'] = baidu_secret_key
+
+                    setting['model'] = ModelConfig(
+                        model=main_model,
+                        ak=baidu_access_key,
+                        sk=baidu_secret_key,
+                        max_tokens=4096
+                    ) 
+                    setting['sub_model'] = ModelConfig(
+                        model=sub_model,
+                        ak=baidu_access_key,
+                        sk=baidu_secret_key,
+                        max_tokens=4096
+                    )
+
+                submit_event = dict(
+                    fn=on_submit,
+                    inputs=[main_model, sub_model, baidu_access_key, baidu_secret_key],
+                )
+
+                on_submit(main_model.value, sub_model.value, baidu_access_key.value, baidu_secret_key.value)
+
+                main_model.change(**submit_event)
+                sub_model.change(**submit_event)
+                baidu_access_key.change(**submit_event)
+                baidu_secret_key.change(**submit_event)
+            
+            elif setting['provider_name'] == Provider.DOUBAO:
+                def on_submit(main_model, sub_model, doubao_api_key, main_endpoint_id, sub_endpoint_id):
+                    provider_config['api_key'] = doubao_api_key
+                    provider_config['main_endpoint_id'] = main_endpoint_id
+                    provider_config['sub_endpoint_id'] = sub_endpoint_id
+                            
+                    setting['model'] = ModelConfig(
+                        model=main_model,
+                        api_key=doubao_api_key,
+                        endpoint_id=main_endpoint_id,
+                        max_tokens=4096
+                    )
+                    setting['sub_model'] = ModelConfig(
+                        model=sub_model,
+                        api_key=doubao_api_key,
+                        endpoint_id=sub_endpoint_id,
+                        max_tokens=4096
+                    )
+                
+                submit_event = dict(    
+                    fn=on_submit,
+                    inputs=[main_model, sub_model, doubao_api_key, main_endpoint_id, sub_endpoint_id],
+                )
+
+                on_submit(main_model.value, sub_model.value, doubao_api_key.value, main_endpoint_id.value, sub_endpoint_id.value)
+
+                main_model.change(**submit_event)
+                sub_model.change(**submit_event)
+                doubao_api_key.change(**submit_event)
+                main_endpoint_id.change(**submit_event)
+                sub_endpoint_id.change(**submit_event)
+
+            elif setting['provider_name'] in [Provider.GPT, Provider.OTHERS]:
+                def on_submit(main_model, sub_model, gpt_api_key, base_url):
+                    provider_config['api_key'] = gpt_api_key
+                    provider_config['base_url'] = base_url.strip()
+                    
+                    setting['model'] = ModelConfig(
+                        model=main_model,
+                        api_key=provider_config['api_key'],
+                        base_url=provider_config['base_url'],
+                        max_tokens=4096,
+                        proxies=provider_config.get('proxies', None),
+                    )
+                    setting['sub_model'] = ModelConfig(
+                        model=sub_model,
+                        api_key=provider_config['api_key'],
+                        base_url=provider_config['base_url'],
+                        max_tokens=4096,
+                        proxies=provider_config.get('proxies', None),
+                    )
+                
+                submit_event = dict(
+                    fn=on_submit,
+                    inputs=[main_model, sub_model, gpt_api_key, base_url],
+                )
+
+                on_submit(main_model.value, sub_model.value, gpt_api_key.value, base_url.value)
+
+                main_model.change(**submit_event)
+                sub_model.change(**submit_event)
+                gpt_api_key.change(**submit_event)
+                base_url.change(**submit_event)
+
+            elif setting['provider_name'] == Provider.ZHIPUAI:
+                def on_submit(main_model, sub_model, zhipuai_api_key):
+                    provider_config['api_key'] = zhipuai_api_key
+                    
+                    setting['model'] = ModelConfig(
+                        model=main_model,
+                        api_key=zhipuai_api_key,
+                        max_tokens=4096
+                    )
+                    setting['sub_model'] = ModelConfig(
+                        model=sub_model,
+                        api_key=zhipuai_api_key,
+                        max_tokens=4096
+                    )
+                
+                submit_event = dict(
+                    fn=on_submit,
+                    inputs=[main_model, sub_model, zhipuai_api_key],
+                )
+
+                on_submit(main_model.value, sub_model.value, zhipuai_api_key.value)
+
+                main_model.change(**submit_event)
+                sub_model.change(**submit_event)
+                zhipuai_api_key.change(**submit_event)
+
+            if RENDER_SETTING_API_TEST_BTN:
+                test_btn = gr.Button("测试")
+                test_report = gr.Textbox(show_label=False, container=False, value='', interactive=False, scale=10)
+        
+            def on_test_llm_api():
+                if not setting['model']['model'].strip():
+                    return gr.Info('主模型名不能为空')
+                
+                if not setting['sub_model']['model'].strip():
+                    return gr.Info('辅助模型名不能为空')
+
+                try:
+                    response1 = yield from test_stream_chat(setting['model'])
+                    response2 = yield from test_stream_chat(setting['sub_model'])
+                    report_text = f"User:1+1=?\n主模型 ：{response1.response}({response1.cost_info})\n辅助模型：{response2.response}({response2.cost_info})\n测试通过！"
+                    yield report_text
+                except Exception as e:
+                    yield f"测试失败：{str(e)}"
+            
+            if RENDER_SETTING_API_TEST_BTN:
+                test_btn.click(
+                    on_test_llm_api,
+                    outputs=[test_report]
+                )
\ No newline at end of file
diff --git a/core/frontend_utils.py b/core/frontend_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d04f16b01ec0dd330cc555a08d17fb56eeff2e3
--- /dev/null
+++ b/core/frontend_utils.py
@@ -0,0 +1,333 @@
+import copy
+import functools
+import pickle
+import os
+import time
+import gradio as gr
+
+from core.writer import Chunk
+
+title = """
+<div style="text-align: center; padding: 10px 20px;">
+    <h1 style="margin: 0 0 5px 0;">🖋️ Long-Novel-GPT 1.10</h1>
+    <p style="margin: 0;"><em>AI一键生成长篇小说</em></p>
+</div>
+"""
+
+info = \
+"""1. 当前Demo支持GPT、Claude、文心、豆包、GLM等模型，并且已经配置了API-Key，默认模型为GPT4o，最大线程数为5。
+2. 可以选中**示例**中的任意一个创意，然后点击**创作大纲**来初始化大纲。
+3. 初始化后，点击**开始创作**按钮，可以不断创作大纲，直到满意为止。
+4. 创建完大纲后，点击**创作剧情**按钮，之后重复以上流程。
+5. 选中**一键生成**后，再次点击左侧按钮可以一键生成。
+6. 如果遇到任何无法解决的问题，请点击**刷新**按钮。
+7. 如果问题还是无法解决，请刷新浏览器页面，这会导致丢失所有数据，请手动备份重要文本。
+"""
+
+
+def init_writer(idea, check_empty=True):  
+    outline_w = dict(
+        current_cost=0,
+        total_cost=0,
+        currency_symbol='￥',
+        xy_pairs=[(idea, '')],
+        apply_chunks={},
+    )
+    chapters_w = dict(
+        current_cost=0,
+        total_cost=0,
+        currency_symbol='￥',
+        xy_pairs=[('', '')],
+        apply_chunks={},
+    )
+    draft_w = dict(
+        current_cost=0,
+        total_cost=0,
+        currency_symbol='￥',
+        xy_pairs=[('', '')],
+        apply_chunks={},
+    )
+    suggestions = dict(
+        outline_w = ['新建大纲', '扩写大纲', '润色大纲'],
+        chapters_w = ['新建剧情', '扩写剧情', '润色剧情'],
+        draft_w = ['新建正文', '扩写正文', '润色正文'],
+    )
+
+    suggestions_dirname = dict(
+        outline_w = 'prompts/创作大纲',
+        chapters_w = 'prompts/创作剧情',
+        draft_w = 'prompts/创作正文',
+    )
+
+    chunk_length = dict(
+        outline_w = [4_000, ],
+        chapters_w = [500, 200, 1000, 2000],
+        draft_w = [1000, 500, 2000, 3000],
+    )
+
+    writer = dict(
+        current_w='outline_w',
+        outline_w=outline_w,
+        chapters_w=chapters_w,
+        draft_w=draft_w,
+        running_flag=False,
+        cancel_flag=False,  # 用于取消正在进行的操作
+        pause_flag=False,   # 用于暂停操作
+        progress={},
+        prompt_outputs=[],  # 这一行未注释时，将在gradio界面中显示prompt_outputs
+        suggestions=suggestions,
+        suggestions_dirname=suggestions_dirname,
+        pause_on_prompt_finished_flag = False,
+        quote_span = None,
+        chunk_length = chunk_length,
+    )
+
+    current_w_name = writer['current_w']
+    if check_empty and writer_x_is_empty(writer, current_w_name):
+        raise Exception('请先输入小说简介！')
+    else:
+        return writer
+
+def init_chapters_w(writer, check_empty=True):
+    outline_w = writer['outline_w']
+    chapters_w = writer['chapters_w']
+    outline_y = "".join([e[1] for e in outline_w['xy_pairs']])
+    chapters_w['xy_pairs'] = [(outline_y, '')]
+
+    writer["current_w"] = "chapters_w"
+    
+    current_w_name = writer['current_w']
+    if check_empty and writer_x_is_empty(writer, current_w_name):
+        raise Exception('大纲不能为空')
+    else:
+        return writer
+
+def init_draft_w(writer, check_empty=True):
+    chapters_w = writer['chapters_w']
+    draft_w = writer['draft_w']
+    chapters_y = "".join([e[1] for e in chapters_w['xy_pairs']])
+    draft_w['xy_pairs'] = [(chapters_y, '')]
+
+    writer["current_w"] = "draft_w"
+    
+    current_w_name = writer['current_w']
+    if check_empty and writer_x_is_empty(writer, current_w_name):
+        raise Exception('剧情不能为空')
+    else:
+        return writer
+
+# 在将writer传递到backend之前，只传递backend需要的部分
+# 这样从backend返回new_writer后，可以直接用update更新writer_state
+def process_writer_to_backend(writer):
+    remained_keys = ['current_w', 'outline_w', 'chapters_w', 'draft_w', 'quote_span']
+    new_writer = {key: writer[key] for key in remained_keys}
+    return copy.deepcopy(new_writer)
+
+# 在整个writer_state生命周期中，其对象地址都不应被改变，这样方便各种flag的检查
+def process_writer_from_backend(writer, new_writer):
+    for key in ['outline_w', 'chapters_w', 'draft_w']:
+        writer[key] = copy.deepcopy(new_writer[key])
+    return writer
+
+def is_running(writer):
+    # 只检查是否有正在运行的操作
+    return writer['running_flag'] and not writer['cancel_flag']
+
+def has_accept(writer):
+    # 只检查是否有待接受的文本
+    current_w = writer[writer['current_w']]
+    return bool(current_w['apply_chunks'])
+
+def cancellable(func):
+    @functools.wraps(func)
+    def wrapper(writer, *args, **kwargs):
+        if is_running(writer):
+            gr.Warning('另一个操作正在进行中，请等待其完成或取消！')
+            return
+        
+        if has_accept(writer) and wrapper.__name__ != "on_accept_write":
+            gr.Warning('有正在等待接受的文本，点击接受或取消！')
+            return
+        
+        writer['running_flag'] = True
+        writer['cancel_flag'] = False
+        writer['pause_flag'] = False
+        
+        generator = func(writer, *args, **kwargs)
+        result = None
+        try:
+            while True:   
+                if writer['cancel_flag']:
+                    gr.Info('操作已取消！')
+                    return
+                
+                # pause 暂停逻辑由func内部实现，便于它们在暂停前后执行一些操作              
+                try:
+                    result = next(generator)
+                    if isinstance(result, tuple) and (writer_dict := next((item for item in result if isinstance(item, dict) and 'running_flag' in item), None)):
+                        assert writer is writer_dict, 'writer对象地址发生了改变'
+                        writer = writer_dict
+                    yield result
+                except StopIteration as e:
+                    return e.value
+                except Exception as e:
+                    raise gr.Error(f'操作过程中发生错误：{e}')
+        finally:
+            writer['running_flag'] = False
+            writer['pause_flag'] = False
+    
+    return wrapper
+
+def try_cancel(writer):
+    if not (is_running(writer) or has_accept(writer)):
+        gr.Info('当前没有正在进行的操作或待接受的文本')
+        return
+    
+    writer['prompt_outputs'] = []
+    current_w = writer[writer['current_w']]
+    if not is_running(writer) and has_accept(writer):    # 优先取消待接受的文本
+        current_w['apply_chunks'].clear()
+        gr.Info('已取消待接受的文本')
+        return
+
+    writer['cancel_flag'] = True
+    
+    start_time = time.time()
+    while writer['running_flag'] and time.time() - start_time < 3:
+        time.sleep(0.1)
+    
+    if writer['running_flag']:
+        gr.Warning('取消操作超时，可能需要刷新页面')
+    
+    writer['cancel_flag'] = False
+    
+def writer_y_is_empty(writer, w_name):
+    xy_pairs = writer[w_name]['xy_pairs']
+    return sum(len(e[1]) for e in xy_pairs) == 0
+
+def writer_x_is_empty(writer, w_name):
+    xy_pairs = writer[w_name]['xy_pairs']
+    return sum(len(e[0]) for e in xy_pairs) == 0
+
+
+# create a markdown table
+# TODO: 优化显示逻辑，字少的列宽度小，字多的列宽度大
+def create_comparison_table(pairs, column_names=['Original Text', 'Enhanced Text', 'Enhanced Text 2']):
+    # Check if any pair has 3 elements
+    has_third_column = any(len(pair) == 3 for pair in pairs)
+    
+    # Create table header
+    if has_third_column:
+        table = f"| {column_names[0]} | {column_names[1]} | {column_names[2]} |\n|---------------|-----------------|----------------|\n"
+    else:
+        table = f"| {column_names[0]} | {column_names[1]} |\n|---------------|---------------|\n"
+    
+    # Add rows to the table
+    for pair in pairs:
+        x = pair[0].replace('|', '\\|').replace('\n', '<br>')
+        y1 = pair[1].replace('|', '\\|').replace('\n', '<br>')
+        
+        if has_third_column:
+            y2 = pair[2].replace('|', '\\|').replace('\n', '<br>') if len(pair) == 3 else ''
+            table += f"| {x} | {y1} | {y2} |\n"
+        else:
+            table += f"| {x} | {y1} |\n"
+    
+    return table
+
+def messages2chatbot(messages):
+    if len(messages) and messages[0]['role'] == 'system':
+        return [{'role': 'user', 'content': messages[0]['content']}, ] + messages[1:]
+    else:
+        return messages
+    
+def create_progress_md(writer):
+    progress_md = ""
+    if 'progress' in writer and writer['progress']:
+        progress = writer['progress']
+        progress_md = ""
+        
+        # 使用集合来去重并保持顺序
+        titles = []
+        subtitles = {}
+        current_op_ij = (float('inf'), float('inf'))
+        for opi, op in enumerate(progress['ops']):
+            if op['title'] not in titles:
+                titles.append(op['title'])
+            if op['title'] not in subtitles:
+                subtitles[op['title']] = []
+            if op['subtitle'] not in subtitles[op['title']]:
+                subtitles[op['title']].append(op['subtitle'])
+            
+            if opi == progress['cur_op_i']:
+                current_op_ij = (len(titles), len(subtitles[op['title']]))
+        
+        for i, title in enumerate(titles, 1):
+            progress_md += f"## {['一', '二', '三', '四', '五', '六', '七', '八', '九', '十'][i-1]}、{title}\n"
+            for j, subtitle in enumerate(subtitles[title], 1):
+                if i < current_op_ij[0] or (i == current_op_ij[0] and j < current_op_ij[1]):
+                    progress_md += f"### {j}、{subtitle} ✓\n"
+                elif i == current_op_ij[0] and j == current_op_ij[1]:
+                    progress_md += f"### {j}、{subtitle} {'.' * (int(time.time()) % 4)}\n"
+                else:
+                    progress_md += f"### {j}、{subtitle}\n"
+            
+            progress_md += "\n"
+        
+        progress_md += "---\n"
+        # TODO: 考虑只放当前进度
+
+    return gr.Markdown(progress_md)
+
+                
+def create_text_md(writer):
+    current_w_name = writer['current_w']
+    current_w = writer[current_w_name]
+    apply_chunks = current_w['apply_chunks']
+
+    match current_w_name:
+        case 'draft_w':
+            column_names = ['剧情', '正文', '修正稿']
+        case 'outline_w':
+            column_names = ['小说简介', '大纲', '修正稿']
+        case 'chapters_w':
+            column_names = ['大纲', '剧情', '修正稿']
+        case _:
+            raise Exception('当前状态不正确')
+
+    xy_pairs = current_w['xy_pairs']
+    if apply_chunks:
+        table = [[*e, ''] for e in xy_pairs]
+        occupied_rows = [False] * len(table)
+        for chunk, key, text in apply_chunks:
+            if not isinstance(chunk, Chunk):
+                chunk = Chunk(**chunk)
+            assert key == 'y_chunk'
+            pair_span = chunk.text_source_slice
+            if any(occupied_rows[pair_span]):
+                raise Exception('apply_chunks中存在重叠的pair_span')
+            occupied_rows[pair_span] = [True] * (pair_span.stop - pair_span.start)
+            table[pair_span] = [[chunk.x_chunk, chunk.y_chunk, text], ] + [None] * (pair_span.stop - pair_span.start - 1)
+        table = [e for e in table if e is not None]
+        if not any(e[1] for e in table):
+            column_names = column_names[:2]
+            column_names[1] = column_names[1] + '（待接受）'
+            table = [[e[0], e[2]] for e in table]
+        md = create_comparison_table(table, column_names=column_names)
+    else:
+        if writer_x_is_empty(writer, current_w_name):
+            tip_x = '从下方示例中选择一个创意用于创作小说。'
+            tip_y = '选择创意后，点击创作大纲。更详细的操作请参考使用指南。'
+            if not xy_pairs[0][0].strip():
+                xy_pairs = [[tip_x, tip_y]]
+            else:
+                xy_pairs = [[xy_pairs[0][0], tip_y]]
+
+        md = create_comparison_table(xy_pairs, column_names=column_names[:2])
+    
+    if len(md) < 400:
+        height = '200px'
+    else:
+        height = '600px'
+    return gr.Markdown(md, height=height)
+
diff --git a/core/outline_writer.py b/core/outline_writer.py
new file mode 100644
index 0000000000000000000000000000000000000000..834e67245443974ea2aac5812929c382d0e6ee0c
--- /dev/null
+++ b/core/outline_writer.py
@@ -0,0 +1,88 @@
+from core.parser_utils import parse_chapters
+from core.writer_utils import KeyPointMsg
+from core.writer import Writer
+
+from prompts.创作章节.prompt import main as prompt_outline
+from prompts.提炼.prompt import main as prompt_summary
+
+class OutlineWriter(Writer):
+    def __init__(self, xy_pairs, global_context, model=None, sub_model=None, x_chunk_length=2_000, y_chunk_length=2_000, max_thread_num=5):
+        super().__init__(xy_pairs, global_context, model, sub_model, x_chunk_length=x_chunk_length, y_chunk_length=y_chunk_length, max_thread_num=max_thread_num)
+
+    def write(self, user_prompt, pair_span=None):
+        target_chunk = self.get_chunk(pair_span=pair_span)
+
+        if not self.global_context.get("summary", ''):
+            raise Exception("需要提供小说简介。")
+        
+        if not target_chunk.y_chunk.strip():
+            if not self.y.strip():
+                chunks = [target_chunk, ]
+            else:
+                raise Exception("选中进行创作的内容不能为空，考虑随便填写一些占位的字。")
+        else:
+            chunks = self.get_chunks(pair_span)
+
+        new_chunks = yield from self.batch_yield(
+            [self.write_text(e, prompt_outline, user_prompt) for e in chunks], 
+            chunks, prompt_name='创作文本')
+        
+        results = yield from self.batch_split_chapters(new_chunks)
+
+        new_chunks2 = [e[0] for e in results]
+
+        self.apply_chunks(chunks, new_chunks2)
+
+    def split_chapters(self, chunk):
+        if False: yield # 将此函数变为生成器函数
+
+        assert chunk.x_chunk == '', 'chunk.x_chunk不为空'
+        chapter_titles, chapter_contents = parse_chapters(chunk.y_chunk)
+        new_xy_pairs = self.construct_xy_pairs(chapter_titles, chapter_contents)
+        
+        return chunk.edit(text_pairs=new_xy_pairs), True, ''
+    
+    def construct_xy_pairs(self, chapter_titles, chapter_contents):
+        return [('', f"{title[0]} {title[1]}\n{content}") for title, content in zip(chapter_titles, chapter_contents)]
+    
+    def batch_split_chapters(self, chunks):
+        results = yield from self.batch_yield(
+            [self.split_chapters(e) for e in chunks], chunks, prompt_name='划分章节')
+        return results
+    
+    def summary(self):
+        target_chunk = self.get_chunk(pair_span=(0, len(self.xy_pairs)))
+        if not target_chunk.y_chunk:
+            raise Exception("没有章节需要总结。")
+        if len(target_chunk.y_chunk) <= 5:
+            raise Exception("需要总结的章节不能少于5个字。")
+        
+        if len(target_chunk.y_chunk) > 2000:
+            y = self._truncate_chunk(target_chunk.y_chunk)
+        else:
+            y = target_chunk.y_chunk
+    
+        result = yield from prompt_summary(self.model, "提炼大纲", y=y)
+
+        self.global_context['outline'] = result['text']
+
+    def get_model(self):
+        return self.model
+
+    def get_sub_model(self):
+        return self.sub_model
+
+    def _truncate_chunk(self, text, chunk_size=100, keep_chunks=20):
+        """Truncate chunk content by keeping evenly spaced sections"""
+        if len(text) <= 2000:
+            return text
+        
+        # Split into chunks of chunk_size
+        chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
+        
+        # Select evenly spaced chunks
+        step = len(chunks) // keep_chunks
+        selected_chunks = chunks[::step][:keep_chunks]
+        new_content = '...'.join(selected_chunks)
+        return new_content
+        
\ No newline at end of file
diff --git a/core/parser_utils.py b/core/parser_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..61f3ae15ed2a10451da350bf5ef4e9229940d35a
--- /dev/null
+++ b/core/parser_utils.py
@@ -0,0 +1,32 @@
+import re
+
+
+def parse_chapters(content):
+    # Single pattern to capture: full chapter number (第X章), title, and content
+    pattern = r'(第[零一二三四五六七八九十百千万亿0123456789.-]+章)([^\n]*)\n*([\s\S]*?)(?=第[零一二三四五六七八九十百千万亿0123456789.-]+章|$)'
+    matches = re.findall(pattern, content)
+    
+    # Unpack directly into separate lists using zip
+    chapter_titles, title_names, chapter_contents = zip(*[
+        (index, name.strip(), content.strip())
+        for index, name, content in matches
+    ]) if matches else ([], [], [])
+    
+    return list(zip(chapter_titles, title_names)), list(chapter_contents)
+
+
+if __name__ == "__main__":
+    test = """
+    第1-1章 出世
+    主角张小凡出身贫寒，因天赋异禀被青云门收为弟子，开始修仙之路。
+
+    第2.1章 初入青云
+
+    张小凡在青云门中结识师兄弟，学习基础法术，逐渐适应修仙生活。
+
+    第3章 灵气初现
+    张小凡在一次意外中感受到天地灵气，修为有所提升。
+    """
+
+    results = parse_chapters(test)
+    print()
diff --git a/core/plot_writer.py b/core/plot_writer.py
new file mode 100644
index 0000000000000000000000000000000000000000..01f8d8c940b5d907c83837ac4efc8fb4bc81ecf6
--- /dev/null
+++ b/core/plot_writer.py
@@ -0,0 +1,49 @@
+from core.writer_utils import KeyPointMsg
+from core.writer import Writer
+
+from prompts.创作剧情.prompt import main as prompt_plot
+from prompts.提炼.prompt import main as prompt_summary
+
+class PlotWriter(Writer):
+    def __init__(self, xy_pairs, global_context, model=None, sub_model=None, x_chunk_length=200, y_chunk_length=1000, max_thread_num=5):
+        super().__init__(xy_pairs, global_context, model, sub_model, x_chunk_length=x_chunk_length, y_chunk_length=y_chunk_length, max_thread_num=max_thread_num)
+
+    def write(self, user_prompt, pair_span=None):
+        target_chunk = self.get_chunk(pair_span=pair_span)
+
+        if not self.global_context.get("chapter", ''):
+            raise Exception("需要提供章节内容。")
+        
+        if not target_chunk.y_chunk.strip():
+            if not self.y.strip():
+                chunks = [target_chunk, ]
+            else:
+                raise Exception("选中进行创作的内容不能为空，考虑随便填写一些占位的字。")
+        else:
+            chunks = self.get_chunks(pair_span)
+
+        new_chunks = yield from self.batch_yield(
+            [self.write_text(e, prompt_plot, user_prompt) for e in chunks], 
+            chunks, prompt_name='创作文本')
+        
+        results = yield from self.batch_map_text(new_chunks)
+        new_chunks2 = [e[0] for e in results]
+
+        self.apply_chunks(chunks, new_chunks2)
+    
+    def summary(self):
+        target_chunk = self.get_chunk(pair_span=(0, len(self.xy_pairs)))
+        if not target_chunk.y_chunk:
+            raise Exception("没有剧情需要总结。")
+        if len(target_chunk.y_chunk) <= 5:
+            raise Exception("需要总结的剧情不能少于5个字。")
+        
+        result = yield from prompt_summary(self.model, "提炼章节", y=target_chunk.y_chunk)
+
+        self.global_context['chapter'] = result['text']
+
+    def get_model(self):
+        return self.model
+
+    def get_sub_model(self):
+        return self.sub_model
diff --git a/core/summary_novel.py b/core/summary_novel.py
new file mode 100644
index 0000000000000000000000000000000000000000..15795cff84efc8c0348bcf782756a335ac660cf9
--- /dev/null
+++ b/core/summary_novel.py
@@ -0,0 +1,94 @@
+import numpy as np
+from core.draft_writer import DraftWriter
+from core.plot_writer import PlotWriter
+from core.outline_writer import OutlineWriter
+from core.writer_utils import KeyPointMsg
+
+
+
+def summary_draft(model, sub_model, chapter_title, chapter_text):
+    xy_pairs = [('', chapter_text)]
+
+    dw = DraftWriter(xy_pairs, {}, model=model, sub_model=sub_model, x_chunk_length=500, y_chunk_length=1000)
+    dw.max_thread_num = 1   # 每章的处理只采用一个线程
+
+    generator = dw.summary(pair_span=(0, len(xy_pairs)))
+
+    kp_msg_title = ''
+    for kp_msg in generator:
+        if isinstance(kp_msg, KeyPointMsg):
+            # 如果要支持关键节点保存，需要计算一个编辑上的更改，然后在这里yield writer
+            kp_msg_title = kp_msg.prompt_name
+            continue
+        else:
+            chunk_list = kp_msg
+
+        current_cost = 0
+        currency_symbol = ''
+        finished_chunk_num = 0
+        chars_num = 0
+        model = None
+        for e in chunk_list:
+            if e is None: continue
+            finished_chunk_num += 1
+            output, chunk = e
+            if output is None: continue #  说明是map_text, 在第一次next就stop iteration了
+            current_cost += output['response_msgs'].cost
+            currency_symbol = output['response_msgs'].currency_symbol
+            chars_num += len(output['response_msgs'].response)
+            model = output['response_msgs'].model
+
+        yield dict(
+            progress_msg=f"[{chapter_title}] 提炼章节剧情 {kp_msg_title} 进度：{finished_chunk_num}/{len(chunk_list)}  已创作字符：{chars_num}  已花费：{current_cost:.4f}{currency_symbol}",
+            chars_num=chars_num,
+            current_cost=current_cost,
+            currency_symbol=currency_symbol,
+            model=model
+        )
+
+    return dw
+
+
+def summary_plot(model, sub_model, chapter_title, chapter_plot):
+    xy_pairs = [('', chapter_plot)]
+    
+    pw = PlotWriter(xy_pairs, {}, model=model, sub_model=sub_model, x_chunk_length=500, y_chunk_length=1000)
+    
+    generator = pw.summary()
+
+    for output in generator:
+        current_cost = output['response_msgs'].cost
+        currency_symbol = output['response_msgs'].currency_symbol
+        chars_num = len(output['response_msgs'].response)
+        yield dict(
+            progress_msg=f"[{chapter_title}] 提炼章节大纲 已创作字符：{chars_num}  已花费：{current_cost:.4f}{currency_symbol}",
+            chars_num=chars_num,
+            current_cost=current_cost,
+            currency_symbol=currency_symbol,
+            model=output['response_msgs'].model
+        )
+
+    return pw
+
+def summary_chapters(model, sub_model, title, chapter_titles, chapter_content):
+    ow = OutlineWriter([('', '')], {}, model=model, sub_model=sub_model, x_chunk_length=500, y_chunk_length=1000)
+    ow.xy_pairs = ow.construct_xy_pairs(chapter_titles, chapter_content)
+    
+    generator = ow.summary()
+
+    for output in generator:
+        current_cost = output['response_msgs'].cost
+        currency_symbol = output['response_msgs'].currency_symbol
+        chars_num = len(output['response_msgs'].response)
+        yield dict(
+            progress_msg=f"[{title}] 提炼全书大纲 已创作字符：{chars_num}  已花费：{current_cost:.4f}{currency_symbol}",
+            chars_num=chars_num,
+            current_cost=current_cost,
+            currency_symbol=currency_symbol,
+            model=output['response_msgs'].model
+        )
+
+    return ow
+
+
+    
\ No newline at end of file
diff --git a/core/writer.py b/core/writer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e64c1acd548b8202292c34ed4a9bfc74b68dc482
--- /dev/null
+++ b/core/writer.py
@@ -0,0 +1,533 @@
+import re
+import numpy as np
+import bisect
+from dataclasses import asdict, dataclass
+
+from llm_api import ModelConfig
+from prompts.对齐剧情和正文 import prompt as match_plot_and_text
+from prompts.审阅.prompt import main as prompt_review
+from core.writer_utils import split_text_into_chunks, detect_max_edit_span, run_yield_func
+from core.writer_utils import KeyPointMsg
+from core.diff_utils import get_chunk_changes
+
+
+class Chunk(dict):
+    def __init__(self, chunk_pairs: tuple[tuple[str, str, str]], source_slice: tuple[int, int], text_slice: tuple[int, int]):
+        super().__init__()
+        self['chunk_pairs'] = tuple(chunk_pairs)
+        
+        if isinstance(source_slice, slice):
+            source_slice = (source_slice.start, source_slice.stop)
+        self['source_slice'] = source_slice
+
+        if isinstance(text_slice, slice):
+            text_slice = (text_slice.start, text_slice.stop)
+        assert text_slice[1] is None or text_slice[1] < 0, 'text_slice end must be None or negative'
+        self['text_slice'] = text_slice
+
+    def edit(self, x_chunk=None, y_chunk=None, text_pairs=None):
+        if x_chunk is not None:
+            text_pairs = [(x_chunk, self.y_chunk), ]
+        elif y_chunk is not None:
+            text_pairs = [(self.x_chunk, y_chunk), ]
+        else:
+            text_pairs = text_pairs
+
+        chunk_pairs = list(self['chunk_pairs'])
+        chunk_pairs[self.text_slice] = list(text_pairs)
+
+        return Chunk(chunk_pairs=tuple(chunk_pairs), source_slice=self.source_slice, text_slice=self.text_slice)
+    
+    @property
+    def source_slice(self) -> slice:
+        return slice(*self['source_slice'])
+
+    @property
+    def chunk_pairs(self) -> tuple[tuple[str, str]]:
+        return self['chunk_pairs']
+    
+    @property
+    def text_slice(self) -> slice:
+        return slice(*self['text_slice'])
+    
+    @property
+    def text_source_slice(self) -> slice:
+        source_start = self.source_slice.start + self.text_slice.start
+        source_stop = self.source_slice.stop + (self.text_slice.stop or 0)
+        return slice(source_start, source_stop)
+    
+    @property
+    def text_pairs(self) -> tuple[tuple[str, str]]:
+        return self.chunk_pairs[self.text_slice]
+    
+    @property
+    def x_chunk(self) -> str:
+        return ''.join(pair[0] for pair in self.text_pairs)
+    
+    @property
+    def y_chunk(self) -> str:
+        return ''.join(pair[1] for pair in self.text_pairs)
+    
+    @property
+    def x_chunk_len(self) -> int:
+        return sum(len(pair[0]) for pair in self.text_pairs)
+    
+    @property
+    def y_chunk_len(self) -> int:
+        return sum(len(pair[1]) for pair in self.text_pairs)
+    
+    @property
+    def x_chunk_context(self) -> str:
+        return ''.join(pair[0] for pair in self.chunk_pairs)
+    
+    @property
+    def y_chunk_context(self) -> str:
+        return ''.join(pair[1] for pair in self.chunk_pairs)
+    
+    @property
+    def x_chunk_context_len(self) -> int:
+        return sum(len(pair[0]) for pair in self.chunk_pairs)
+    
+    @property
+    def y_chunk_context_len(self) -> int:
+        return sum(len(pair[1]) for pair in self.chunk_pairs)
+    
+    
+class Writer:
+    def __init__(self, xy_pairs, global_context=None, model:ModelConfig=None, sub_model:ModelConfig=None, x_chunk_length=1000, y_chunk_length=1000, max_thread_num=5):
+        self.xy_pairs = xy_pairs
+        self.global_context = global_context or {}
+
+        self.model = model
+        self.sub_model = sub_model
+
+        self.x_chunk_length = x_chunk_length
+        self.y_chunk_length = y_chunk_length
+
+        # x_chunk_length是指一次prompt调用时输入的x长度（由batch_map函数控制）, 此参数会影响到映射到y的扩写率（即：LLM的输出窗口长度/x_chunk_length）
+        # 同时，x_chunk_length会影响到map的chunk大小，map的pair大小主要由x_chunk_length决定（具体来说，由update_map函数控制，为x_chunk_length//2)
+        # y_chunk_length对pair大小的影响较少（因为映射是一对多）
+
+        self.max_thread_num = max_thread_num    # 使得可以单独控制某个chunk变量的线程数，这在同时运行多个Writer变量时有用
+    
+    @property
+    def x(self):    # TODO: 考虑x经常访问的情况
+        return ''.join(pair[0] for pair in self.xy_pairs)
+
+    @property
+    def y(self):
+        return ''.join(pair[1] for pair in self.xy_pairs)
+    
+    @property
+    def x_len(self):
+        return sum(len(pair[0]) for pair in self.xy_pairs)
+
+    @property
+    def y_len(self):
+        return sum(len(pair[1]) for pair in self.xy_pairs)
+
+    def get_model(self):
+        return self.model
+
+    def get_sub_model(self):
+        return self.sub_model
+    
+    def count_span_length(self, span):
+        pairs = self.xy_pairs[span[0]:span[1]]
+        return sum(len(pair[0]) for pair in pairs), sum(len(pair[1]) for pair in pairs)
+
+    def align_span(self, x_span=None, y_span=None):
+        if x_span is None and y_span is None:
+            raise ValueError("Either x_span or y_span must be provided")
+        
+        if x_span is not None and y_span is not None:
+            raise ValueError("Only one of x_span or y_span should be provided")
+        
+        is_x = x_span is not None
+        z_span = x_span if is_x else y_span
+        cumsum_z = np.cumsum([0] + [len(pair[0 if is_x else 1]) for pair in self.xy_pairs]).tolist()
+        
+        l, r = z_span
+        start_chunk = bisect.bisect_right(cumsum_z, l) - 1
+        end_chunk = bisect.bisect_left(cumsum_z, r)
+        
+        aligned_l = cumsum_z[start_chunk]
+        aligned_r = cumsum_z[end_chunk]
+        
+        aligned_span = (aligned_l, aligned_r)
+        pair_span = (start_chunk, end_chunk)
+        
+        # Add assertions to verify the correctness of the output
+        assert aligned_l <= l < aligned_r, "aligned_span does not properly contain the start of the input span"
+        assert aligned_l < r <= aligned_r, "aligned_span does not properly contain the end of the input span"
+        assert 0 <= start_chunk < end_chunk <= len(self.xy_pairs), "pair_span is out of bounds"
+        assert sum(len(pair[0 if is_x else 1]) for pair in self.xy_pairs[start_chunk:end_chunk]) == aligned_r - aligned_l, "aligned_span and pair_span do not match"
+
+        return aligned_span, pair_span
+    
+    def get_chunk(self, pair_span=None, x_span=None, y_span=None, context_length=0, smooth=True):
+        if sum(x is not None for x in [pair_span, x_span, y_span]) != 1:
+            raise ValueError("Exactly one of pair_span, x_span, or y_span must be provided")
+        
+        assert pair_span is None or (pair_span[0] >= 0 and pair_span[1] <= len(self.xy_pairs)), "pair_span is out of bounds"
+
+        is_x = x_span is not None
+        is_pair = pair_span is not None
+
+        if is_pair:
+            context_pair_span = (
+                max(0, pair_span[0] - context_length),
+                min(len(self.xy_pairs), pair_span[1] + context_length)
+            )
+        else:
+            assert smooth, "smooth must be True"
+            span = x_span if is_x else y_span
+            if smooth:
+                span, pair_span = self.align_span(x_span=span if is_x else None, y_span=span if not is_x else None)
+
+            context_span = (
+                max(0, span[0] - context_length),
+                min(self.x_len if is_x else self.y_len, span[1] + context_length)
+            )
+
+            context_span, context_pair_span = self.align_span(x_span=context_span if is_x else None, y_span=context_span if not is_x else None)
+
+        chunk_pairs = self.xy_pairs[context_pair_span[0]:context_pair_span[1]]
+        source_slice = context_pair_span
+        text_slice = (pair_span[0] - context_pair_span[0], pair_span[1] - context_pair_span[1])
+        assert text_slice[1] <= 0, "text_slice end must be negative"
+        text_slice = (text_slice[0], None if text_slice[1] == 0 else text_slice[1])
+
+        return Chunk(
+            chunk_pairs=chunk_pairs,
+            source_slice=source_slice,
+            text_slice=text_slice
+        )
+    
+    def get_chunk_pair_span(self, chunk: Chunk):
+        pair_start, pair_end = chunk.text_source_slice.start, chunk.text_source_slice.stop
+        merged_x_chunk = ''.join(p[0] for p in self.xy_pairs[pair_start:pair_end])
+        merged_y_chunk = ''.join(p[1] for p in self.xy_pairs[pair_start:pair_end])
+        if merged_x_chunk == chunk.x_chunk and merged_y_chunk == chunk.y_chunk:
+            return pair_start, pair_end
+
+        pair_start, pair_end = 0, len(self.xy_pairs)
+        x_chunk, y_chunk = chunk.x_chunk, chunk.y_chunk
+        for i, (x, y) in enumerate(self.xy_pairs):
+            if x_chunk[:50].startswith(x[:50]) and y_chunk[:50].startswith(y[:50]):
+                pair_start = i
+                break
+
+        for i in range(pair_start, len(self.xy_pairs)):
+            x, y = self.xy_pairs[i]
+            if x_chunk[-50:].endswith(x[-50:]) and y_chunk[-50:].endswith(y[-50:]):
+                pair_end = i + 1
+                break
+
+        # Verify the pair_span
+        merged_x_chunk = ''.join(p[0] for p in self.xy_pairs[pair_start:pair_end])
+        merged_y_chunk = ''.join(p[1] for p in self.xy_pairs[pair_start:pair_end])
+        assert x_chunk == merged_x_chunk and y_chunk == merged_y_chunk, "Chunk mismatch"
+
+        return (pair_start, pair_end)
+    
+    def apply_chunks(self, chunks: list[Chunk], new_chunks: list[Chunk]):
+        occupied_pair_span = [False] * len(self.xy_pairs)
+        pair_span_list = [self.get_chunk_pair_span(e) for e in chunks]
+        for pair_span in pair_span_list:
+            assert not any(occupied_pair_span[pair_span[0]:pair_span[1]]), "Chunk overlap"
+            occupied_pair_span[pair_span[0]:pair_span[1]] = [True] * (pair_span[1] - pair_span[0])
+        # TODO: 这里可以验证occupied_pair_span是否全被占据
+        new_pairs_list = [e.text_pairs for e in new_chunks]
+
+        sorted_spans_with_new_pairs = sorted(
+            zip(pair_span_list, new_pairs_list),
+            key=lambda x: x[0][0],
+            reverse=True
+        )
+
+        for (start, end), new_pairs in sorted_spans_with_new_pairs:
+            self.xy_pairs[start:end] = new_pairs
+
+    def get_chunks(self, pair_span=None, chunk_length_ratio=1, context_length_ratio=1, offset_ratio=0):
+        pair_span = pair_span or (0, len(self.xy_pairs))
+        chunk_length = self.x_chunk_length * chunk_length_ratio, self.y_chunk_length * chunk_length_ratio
+        context_length = self.x_chunk_length//2 * context_length_ratio, self.y_chunk_length//2 * context_length_ratio
+        
+        if 0 < offset_ratio < 1:
+            offset_ratio = int(chunk_length[0] * offset_ratio), int(chunk_length[1] * offset_ratio)
+
+        # Generate chunks
+        chunks = []
+        start = pair_span[0]
+        cstart = self.count_span_length((0, start))  # char_start
+        max_cend = self.count_span_length((0, pair_span[1]))  # char_end
+        while start < pair_span[1]:
+            if offset_ratio != 0:
+                cend = cstart[0] + offset_ratio[0], cstart[1] + offset_ratio[1]
+                offset_ratio = 0
+            else:
+                cend = cstart[0] + int(chunk_length[0] * 0.8), cstart[1] + int(chunk_length[1] * 0.8) # 八二原则，偷个懒，不求最优划分
+            cend = min(cend[0], max_cend[0]), min(cend[1], max_cend[1])
+
+            # 选择非零长度的span来获取chunk
+            x_len, y_len = cend[0] - cstart[0], cend[1] - cstart[1]
+            if x_len > 0:
+                chunk1 = self.get_chunk(x_span=(cstart[0], cend[0]), context_length=context_length[0])
+            if y_len > 0:
+                chunk2 = self.get_chunk(y_span=(cstart[1], cend[1]), context_length=context_length[1])
+            
+            if x_len > 0 and y_len == 0:
+                chunk = chunk1
+            elif x_len == 0 and y_len > 0:
+                chunk = chunk2
+            elif x_len > 0 and y_len > 0:
+                # 选其中source_slice更小的chunk
+                chunk = chunk1 if chunk1.source_slice.stop - chunk1.source_slice.start < chunk2.source_slice.stop - chunk2.source_slice.start else chunk2
+            else:
+                raise ValueError("Both x_span and y_span have zero length")
+             
+            # assert chunk.x_chunk_context_len <= self.x_chunk_length * 2 and chunk.y_chunk_context_len <= self.y_chunk_length * 2, \
+            #     "无法获取到一个足够短的区块，请调整区块长度或窗口长度！"
+
+            chunks.append(chunk)
+            start = chunk.text_source_slice.stop
+            cstart = self.count_span_length((0, start))
+
+        return chunks
+
+    # TODO: batch_yield 可以考虑输入生成器，而不是函数及参数 
+    def batch_yield(self, generators, chunks, prompt_name=None):
+        # TODO: 后续考虑只输出new_chunks, 不必重复输出chunks
+
+        # Process all pairs with the prompt and yield intermediate results
+        results = [None] * len(generators)
+        yields = [None] * len(generators)
+        finished = [False] * len(generators)
+        first_iter_flag = True
+        while True:
+            co_num = 0
+            for i, gen in enumerate(generators):
+                if finished[i]:
+                    continue
+
+                try:
+                    co_num += 1
+                    yield_value = next(gen)
+                    yields[i] = (yield_value, chunks[i])    # TODO: yield 带上chunk是为了配合前端
+                except StopIteration as e:
+                    results[i] = e.value
+                    finished[i] = True
+                    if yields[i] is None: yields[i] = (None, chunks[i])
+                
+                if co_num >= self.max_thread_num:
+                        break
+            
+            if all(finished):
+                break
+
+            if first_iter_flag and prompt_name is not None:
+                yield (kp_msg := KeyPointMsg(prompt_name=prompt_name))
+                first_iter_flag = False
+
+            yield yields  # 如果是yield的值，那必定为tuple
+
+        if not first_iter_flag and prompt_name is not None:
+            yield kp_msg.set_finished()
+
+        return results
+
+    # 临时函数，用于配合前端，返回一个更改，对self施加该更改可以变为cur
+    def diff_to(self, cur, pair_span=None):
+        if pair_span is None:
+            pair_span = (0, len(self.xy_pairs))
+        
+        if self.count_span_length(pair_span)[0] == 0:
+            # 2.1版本中，章节和剧情的创作不参考x
+            pair_span2 = (0 + pair_span[0], len(cur.xy_pairs) - (len(self.xy_pairs) - pair_span[1]))
+            y_list = [e[1] for e in self.xy_pairs[pair_span[0]:pair_span[1]]] 
+            y2_list =[e[1] for e in cur.xy_pairs[pair_span2[0]:pair_span2[1]]]
+            
+            y_list += ['',] * max(len(y2_list) - len(y_list), 0)
+            y2_list += ['',] * max(len(y_list) - len(y2_list), 0)
+
+            data_chunks = [('', y, y2) for y, y2 in zip(y_list, y2_list)]
+
+            return data_chunks
+
+        pre_pointer = 0, 1
+        cur_pointer = 0, 1
+
+        cum_sum_pre = np.cumsum([0] + [len(pair[0]) for pair in self.xy_pairs])
+        cum_sum_cur = np.cumsum([0] + [len(pair[0]) for pair in cur.xy_pairs])
+
+        apply_chunks = []
+
+        while pre_pointer[1] <= len(self.xy_pairs) and cur_pointer[1] <= len(cur.xy_pairs):
+            if cum_sum_pre[pre_pointer[1]] - cum_sum_pre[pre_pointer[0]] == cum_sum_cur[cur_pointer[1]] - cum_sum_cur[cur_pointer[0]]:
+                chunk = self.get_chunk(pair_span=pre_pointer)
+                value = "".join(pair[1] for pair in cur.xy_pairs[cur_pointer[0]:cur_pointer[1]])
+                apply_chunks.append((chunk, 'y_chunk', value))
+
+                pre_pointer = pre_pointer[1], pre_pointer[1] + 1
+                cur_pointer = cur_pointer[1], cur_pointer[1] + 1
+            elif cum_sum_pre[pre_pointer[1]] - cum_sum_pre[pre_pointer[0]] < cum_sum_cur[cur_pointer[1]] - cum_sum_cur[cur_pointer[0]]:
+                pre_pointer = pre_pointer[0], pre_pointer[1] + 1
+            else:
+                cur_pointer = cur_pointer[0], cur_pointer[1] + 1
+        
+        assert pre_pointer[1] == len(self.xy_pairs) + 1 and cur_pointer[1] == len(cur.xy_pairs) + 1
+
+        filtered_apply_chunks = []
+        for e in apply_chunks:
+            text_source_slice = e[0].text_source_slice
+            if text_source_slice.start >= pair_span[0] and text_source_slice.stop <= pair_span[1]:
+                filtered_apply_chunks.append(e)
+
+        data_chunks = []
+        for chunk, key, value in filtered_apply_chunks:
+            data_chunks.append((chunk.x_chunk, chunk.y_chunk, value))
+
+        return data_chunks
+
+    # 临时函数，用于配合前端
+    def apply_chunk(self, chunk:Chunk, key, value):
+        if not isinstance(chunk, Chunk):
+            chunk = Chunk(**chunk)
+        new_chunk = chunk.edit(**{key: value})
+        self.apply_chunks([chunk], [new_chunk])
+    
+    def write_text(self, chunk:Chunk, prompt_main, user_prompt_text, input_keys=None, model=None):
+        chunk2prompt_key = {
+            'x_chunk': 'x',
+            'y_chunk': 'y',
+            'x_chunk_context': 'context_x',
+            'y_chunk_context': 'context_y'
+        }
+        
+    
+        if input_keys is not None:
+            prompt_kwargs = {k: getattr(chunk, k) for k in input_keys}
+            assert all(prompt_kwargs.values()), "Missing required context keys"
+        else:
+            prompt_kwargs = {k: getattr(chunk, k) for k in chunk2prompt_key.keys()}
+        
+        prompt_kwargs = {chunk2prompt_key.get(k, k): v for k, v in prompt_kwargs.items()}
+
+        prompt_kwargs.update(self.global_context)   # prompt_kwargs会把所有的信息都带上，至于要用哪些由prompt决定
+        
+        result = yield from prompt_main(
+            model=model or self.get_model(),
+            user_prompt=user_prompt_text,
+            **prompt_kwargs
+        )
+
+        # 为了在V2.2版本兼容summary_prompt, 后续text_key这种设计会舍弃
+        update_dict = {}
+        if 'text_key' in result:
+            update_dict[result['text_key']] = result['text']
+        else:
+            update_dict['y_chunk'] = result['text']
+
+        return chunk.edit(**update_dict)
+    
+    # 目前review(审阅)的评分机制暂未实装
+    def review_text(self, chunk:Chunk, prompt_name, model=None):
+        result = yield from prompt_review(
+            model=model or self.get_model(),
+            prompt_name=prompt_name,
+            y=chunk.y_chunk
+        )
+
+        return result['text']
+
+    def map_text_wo_llm(self, chunk:Chunk):
+        # 该函数尝试不用LLM进行映射，目标是保证chunk.pairs中每个pair的长度合适，如果长了，进行划分，如果无法划分，报错
+        new_xy_pairs = []
+        for x, y in chunk.text_pairs:
+            if x.strip() and not y.strip():
+                x_pairs = split_text_into_chunks(x, self.x_chunk_length, min_chunk_n=1, min_chunk_size=5)
+                new_xy_pairs.extend([(x_pair, y) for x_pair in x_pairs])
+            elif not x.strip() and y.strip():
+                y_pairs = split_text_into_chunks(y, self.y_chunk_length, min_chunk_n=1, min_chunk_size=5)
+                new_xy_pairs.extend([(x, y_pair) for y_pair in y_pairs])
+            else:
+                if len(x) > self.x_chunk_length or len(y) > self.y_chunk_length:
+                    raise ValueError("窗口太小或段落太长!考虑选择更大的窗口长度或手动分段。")
+                new_xy_pairs.append((x, y))
+        
+        return chunk.edit(text_pairs=new_xy_pairs)
+
+    def map_text(self, chunk:Chunk):
+        # TODO: map会检查映射的内容是否大致匹配，是否有错误映射到context的情况
+
+        if chunk.x_chunk.strip():
+            x_pairs = split_text_into_chunks(chunk.x_chunk, self.x_chunk_length, min_chunk_n=1, min_chunk_size=5, max_chunk_n=20)
+            assert len(x_pairs) >= len(chunk.text_pairs), "未知错误！合并所有区块后再分区块，结果更少？"
+            if len(x_pairs) == len(chunk.text_pairs):
+                return chunk, True, ''
+        else:
+            # 这说明y的创作是不参照x的，而是参照global_context
+            y_pairs = split_text_into_chunks(chunk.y_chunk, self.y_chunk_length, min_chunk_n=1, min_chunk_size=5, max_chunk_n=20)
+            new_xy_pairs = [('', y) for y in y_pairs]
+            return chunk.edit(text_pairs=new_xy_pairs), True, ''
+
+        try:
+            y_pairs = split_text_into_chunks(chunk.y_chunk, self.y_chunk_length, min_chunk_n=len(x_pairs), min_chunk_size=5, max_chunk_n=20)
+        except Exception as e:
+            # 如果y_chunk不能找到更多的区块划分，干脆让x_chunk划分更少的区块
+            y_pairs = split_text_into_chunks(chunk.y_chunk, self.y_chunk_length, min_chunk_n=1, min_chunk_size=5, max_chunk_n=20)
+            x_pairs = split_text_into_chunks(chunk.x_chunk, self.x_chunk_length, min_chunk_n=1, min_chunk_size=5, max_chunk_n=int(0.8 * len(y_pairs)))
+            
+            # TODO: 这是因为目前映射Prompt的设计需要x数量小于y，后续会对Prompt进行改进
+
+        try:
+            gen = match_plot_and_text.main(
+                model=self.get_sub_model(),
+                plot_chunks=x_pairs,
+                text_chunks=y_pairs
+                )
+            while True:
+                yield next(gen)
+        except StopIteration as e:
+            output = e.value
+        
+        x2y = output['plot2text']
+        new_xy_pairs = []
+        for xi_list, yi_list in x2y:
+            xl, xr = xi_list[0], xi_list[-1]
+            new_xy_pairs.append(("".join(x_pairs[xl:xr+1]), "".join(y_pairs[i] for i in yi_list)))
+
+        new_chunk = chunk.edit(text_pairs=new_xy_pairs)
+        return new_chunk, True, ''
+    
+    def batch_map_text(self, chunks):
+        results = yield from self.batch_yield(
+            [self.map_text(e) for e in chunks], chunks, prompt_name='映射文本')
+        return results
+    
+    def batch_write_apply_text(self, chunks, prompt_main, user_prompt_text):
+        new_chunks = yield from self.batch_yield(
+            [self.write_text(e, prompt_main, user_prompt_text) for e in chunks], 
+            chunks, prompt_name='创作文本')
+        
+        results = yield from self.batch_map_text(new_chunks)
+        new_chunks2 = [e[0] for e in results]
+
+        self.apply_chunks(chunks, new_chunks2)
+
+    def batch_review_write_apply_text(self, chunks, write_prompt_main, review_prompt_name):
+        reviews = yield from self.batch_yield(
+            [self.review_text(e, review_prompt_name) for e in chunks], 
+            chunks, prompt_name='审阅文本')
+        
+        rewrite_instrustion = "\n\n根据审阅意见，重新创作，如果审阅意见表示无需改动，则保持原样输出。"
+
+        new_chunks = yield from self.batch_yield(
+            [self.write_text(chunk, write_prompt_main, review + rewrite_instrustion) for chunk, review in zip(chunks, reviews)], 
+            chunks, prompt_name='创作文本')
+        
+        results = yield from self.batch_map_text(new_chunks)
+        new_chunks2 = [e[0] for e in results]
+
+        self.apply_chunks(chunks, new_chunks2)
diff --git a/core/writer_utils.py b/core/writer_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac3aace4fd737684cf97a7451d8c1602560060ef
--- /dev/null
+++ b/core/writer_utils.py
@@ -0,0 +1,216 @@
+import uuid
+
+# 定义了用于Wirter yield的数据类型，同时也是前端展示的“关键点”消息
+class KeyPointMsg(dict):
+    def __init__(self, title='', subtitle='', prompt_name=''):
+        super().__init__()
+        if not title and not subtitle and prompt_name:
+            pass
+        elif title and subtitle and not prompt_name:
+            pass
+        else:
+            raise ValueError('Either title and subtitle or prompt_name must be provided')
+        
+        self.update({
+            'id': str(uuid.uuid4()),
+            'title': title,
+            'subtitle': subtitle,
+            'prompt_name': prompt_name,
+            'finished': False
+        })
+
+    def set_finished(self):
+        assert not self['finished'], 'finished flag is already set'
+        self['finished'] = True
+        return self # 返回self，方便链式调用
+
+    def is_finished(self):
+        return self['finished']
+    
+    def is_prompt(self):
+        return bool(self.prompt_name)
+    
+    def is_title(self):
+        return bool(self.title)
+    
+    @property
+    def id(self):
+        return self['id']
+    
+    @property
+    def title(self):
+        return self['title']
+    
+    @property
+    def subtitle(self):
+        return self['subtitle']
+    
+    @property
+    def prompt_name(self):
+        prompt_name = self['prompt_name']
+        if len(prompt_name) >= 10:
+            return prompt_name[:10] + '...'
+        return prompt_name
+
+
+import re
+from difflib import Differ
+
+# 后续考虑采用现成的库实现，目前逻辑过于繁琐，而且太慢了
+def detect_max_edit_span(a, b):
+    diff = Differ().compare(a, b)
+
+    l = 0
+    r = 0
+    flag_count_l = True
+
+    for tag in diff:
+        if tag.startswith(' '):
+            if flag_count_l:
+                l += 1
+            else:
+                r += 1
+        else:
+            flag_count_l = False
+            r = 0
+
+    return l, -r   
+
+def split_text_by_separators(text, separators, keep_separators=True):
+    """
+    将文本按指定的分隔符分割为段落
+    Args:
+        text: 要分割的文本
+        separators: 分隔符列表
+        keep_separators: 是否在结果中保留分隔符，默认为True
+    Returns:
+        包含分割后段落的列表
+    """
+    pattern = f'({"|".join(map(re.escape, separators))}+)'
+    chunks = re.split(pattern, text)
+    
+    paragraphs = []
+    current_para = []
+    
+    for i in range(0, len(chunks), 2):
+        content = chunks[i]
+        separator = chunks[i + 1] if i + 1 < len(chunks) else ''
+        
+        current_para.append(content)
+        if keep_separators and separator:
+            current_para.append(separator)
+            
+        if content.strip():
+            paragraphs.append(''.join(current_para))
+            current_para = []
+    
+    return paragraphs
+
+def split_text_into_paragraphs(text, keep_separators=True):
+    return split_text_by_separators(text, ['\n'], keep_separators)
+
+def split_text_into_sentences(text, keep_separators=True):
+    return split_text_by_separators(text, ['\n', '。', '？', '！', '；'], keep_separators)
+
+def run_and_echo_yield_func(func, *args, **kwargs):
+    echo_text = ""
+    all_messages = []
+    for messages in func(*args, **kwargs):
+        all_messages.append(messages)
+        new_echo_text = "\n".join(f"{msg['role']}:\n{msg['content']}" for msg in messages)
+        if new_echo_text.startswith(echo_text):
+            delta_echo_text = new_echo_text[len(echo_text):]
+        else:
+            echo_text = ""
+            print('\n--------------------------------')
+            delta_echo_text = new_echo_text
+
+        print(delta_echo_text, end="")
+        echo_text = echo_text + delta_echo_text
+    return all_messages
+
+def run_yield_func(func, *args, **kwargs):
+    gen = func(*args, **kwargs)
+    try:
+        while True:
+            next(gen)
+    except StopIteration as e:
+        return e.value
+
+def split_text_into_chunks(text, max_chunk_size, min_chunk_n, min_chunk_size=1, max_chunk_n=1000):
+    def split_paragraph(para):
+        mid = len(para) // 2
+        split_pattern = r'[。？；]'
+        split_points = [m.end() for m in re.finditer(split_pattern, para)]
+        
+        if not split_points:
+            raise Exception("没有找到分割点!")
+        
+        closest_point = min(split_points, key=lambda x: abs(x - mid))
+        if not para[:closest_point].strip() or not para[closest_point:].strip():
+            raise Exception("没有找到分割点!")
+        
+        return para[:closest_point], para[closest_point:]
+
+    paragraphs = split_text_into_paragraphs(text)
+
+    assert max_chunk_n >= 1, "max_chunk_n必须大于等于1"
+    assert sum(len(p) for p in paragraphs) >= min_chunk_size, f"分割时，输入的文本长度小于要求的min_chunk_size:{min_chunk_size}"
+    count = 0 # 防止死循环
+    while len(paragraphs) > max_chunk_n or min(len(p) for p in paragraphs) < min_chunk_size:
+        assert (count:=count+1) < 1000, "分割进入死循环！"
+
+        # 找出相邻chunks中和最小的两个进行合并
+        min_sum = float('inf')
+        min_i = 0
+
+        for i in range(len(paragraphs) - 1):
+            curr_sum = len(paragraphs[i]) + len(paragraphs[i + 1])
+            if curr_sum < min_sum:
+                min_sum = curr_sum
+                min_i = i
+                
+        # 合并这两个chunks
+        paragraphs[min_i:min_i + 2] = [''.join(paragraphs[min_i:min_i + 2])]
+
+    while len(paragraphs) < min_chunk_n or max(len(p) for p in paragraphs) > max_chunk_size:
+        assert (count:=count+1) < 1000, "分割进入死循环！"
+        longest_para_i = max(range(len(paragraphs)), key=lambda i: len(paragraphs[i]))
+        part1, part2 = split_paragraph(paragraphs[longest_para_i])
+        if len(part1) < min_chunk_size or len(part2) < min_chunk_size or len(paragraphs) + 1 > max_chunk_n:
+            raise Exception("没有找到合适的分割点!")
+        paragraphs[longest_para_i:longest_para_i+1] = [part1, part2]
+    
+    return paragraphs
+
+def test_split_text_into_chunks():
+    # Test case 1: Simple paragraph splitting
+    text1 = "这是第一段。这是第二段。这是第三段。"
+    result1 = split_text_into_chunks(text1, max_chunk_size=10, min_chunk_n=3)
+    print("Test 1 result:", result1)
+    assert len(result1) == 3, f"Expected 3 chunks, got {len(result1)}"
+
+
+    # Test case 2: Long paragraph splitting
+    text2 = "这是一个很长的段落，包含了很多句子。它应该被分割成多个小块。这里有一些标点符号，比如句号。还有问号？以及分号；这些都可以用来分割文本。"
+    result2 = split_text_into_chunks(text2, max_chunk_size=20, min_chunk_n=4)
+    print("Test 2 result:", result2)
+    assert len(result2) >= 4, f"Expected at least 4 chunks, got {len(result2)}"
+    assert all(len(chunk) <= 20 for chunk in result2), "Some chunks are longer than max_chunk_size"
+
+    # Test case 3: Text with newlines
+    text3 = "第一段。\n\n第二段。\n第三段。\n\n第四段很长，需要被分割。这是第四段的继续。"
+    result3 = split_text_into_chunks(text3, max_chunk_size=15, min_chunk_n=5)
+    print("Test 3 result:", result3)
+    assert len(result3) >= 5, f"Expected at least 5 chunks, got {len(result3)}"
+    assert all(len(chunk) <= 15 for chunk in result3), "Some chunks are longer than max_chunk_size"
+
+    print("All tests passed!")
+
+if __name__ == "__main__":
+    print(detect_max_edit_span("我吃西红柿", "我不喜欢吃西红柿"))
+    print(detect_max_edit_span("我吃西红柿", "不喜欢吃西红柿"))
+    print(detect_max_edit_span("我吃西红柿", "我不喜欢吃"))
+    print(detect_max_edit_span("我吃西红柿", "你不喜欢吃西瓜"))
+
+    test_split_text_into_chunks()
diff --git "a/custom/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/\345\244\251\350\232\225\345\234\237\350\261\206\351\243\216\346\240\274.txt" "b/custom/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/\345\244\251\350\232\225\345\234\237\350\261\206\351\243\216\346\240\274.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..f904db147d43d501526137fa72aed74941c75885
--- /dev/null
+++ "b/custom/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/\345\244\251\350\232\225\345\234\237\350\261\206\351\243\216\346\240\274.txt"
@@ -0,0 +1,14 @@
+你是一个网文大神作家，外号天蚕豌豆，擅长写玄幻网文，代表作有《斗破天空》，《舞动乾坤》，《我主宰》。
+
+你的常用反派话语有：
+此子断不可留，否则日后必成大患！
+做事留一线，日后好相见。
+一口鲜血夹杂着破碎的内脏喷出。
+能把我逼到这种地步，你足以自傲了。
+放眼XXX，你也算是凤毛麟角般的存在。
+
+你的常用词语有：
+黯然销魂、神出鬼没、格格不入、微不足道、窃窃私语、给我破、给我碎、摧枯拉朽、倒吸一口凉气、一脚踢开、旋即、苦笑、美眸、一拳、放眼、桀桀、负手而立、摧枯拉朽、黑袍老者、摸了摸鼻子、妮子、贝齿紧咬着红唇、幽怨、浊气、凤毛麟角、一声娇喝、恐怖如斯、纤纤玉手、头角峥嵘、桀桀桀、虎躯一震、苦笑一声、三千青丝
+
+
+下面我会给你一段网文提纲，需要你对其进行润色或重写，输出网文正文。
diff --git "a/custom/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/\345\257\271\350\215\211\347\250\277\350\277\233\350\241\214\346\266\246\350\211\262.txt" "b/custom/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/\345\257\271\350\215\211\347\250\277\350\277\233\350\241\214\346\266\246\350\211\262.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..98646e21601446d7bc31d0296956eeb701f36bd6
--- /dev/null
+++ "b/custom/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/\345\257\271\350\215\211\347\250\277\350\277\233\350\241\214\346\266\246\350\211\262.txt"
@@ -0,0 +1,7 @@
+你是一个网文作家，下面我会给你一段简陋粗略的网文草稿，需要你对其进行润色或重写，输出网文正文。
+
+在创作的过程中，你需要注意以下事项：
+1. 在草稿的基础上进行创作，不要过度延申，不要在结尾进行总结。
+2. 对于草稿中内容，在正文中需要用小说家的口吻去描写，包括语言、行为、人物、环境描写等。
+3. 对于草稿中缺失的部分，在正文中需要进行补全。
+
diff --git a/healthcheck.py b/healthcheck.py
new file mode 100644
index 0000000000000000000000000000000000000000..156e2109228ce9613c4d6e4c18556b22ee93550a
--- /dev/null
+++ b/healthcheck.py
@@ -0,0 +1,24 @@
+import http.client
+import sys
+import os
+
+BACKEND_PORT = int(os.environ.get('BACKEND_PORT', 7869))
+
+
+def check_health():
+    try:
+        conn = http.client.HTTPConnection("localhost", BACKEND_PORT)
+        conn.request("GET", "/health")
+        response = conn.getresponse()
+        if response.status == 200:
+            print("Health check passed")
+            return True
+        else:
+            print(f"Health check failed: {response.status}")
+            return False
+    except Exception as e:
+        print(f"Health check failed: {e}", file=sys.stderr)
+        return False
+
+if __name__ == "__main__":
+    sys.exit(0 if check_health() else 1)
diff --git a/llm_api/__init__.py b/llm_api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..95a006c2e466170bcc079cdad34fead8e9e48c88
--- /dev/null
+++ b/llm_api/__init__.py
@@ -0,0 +1,109 @@
+from typing import Dict, Any, Optional, Generator
+
+from .mongodb_cache import llm_api_cache
+from .baidu_api import stream_chat_with_wenxin, wenxin_model_config
+from .doubao_api import stream_chat_with_doubao, doubao_model_config
+from .chat_messages import ChatMessages
+from .openai_api import stream_chat_with_gpt, gpt_model_config
+from .zhipuai_api import stream_chat_with_zhipuai, zhipuai_model_config
+
+class ModelConfig(dict):
+    def __init__(self, model: str, **options):
+        super().__init__(**options)
+        self['model'] = model
+        self.validate()
+
+    def validate(self):
+        def check_key(provider, keys):
+            for key in keys:    
+                if key not in self:
+                    raise ValueError(f"{provider}的API设置中未传入: {key}")
+                elif not self[key].strip():
+                    raise ValueError(f"{provider}的API设置中未配置: {key}")
+
+        if self['model'] in wenxin_model_config:
+            check_key('文心一言', ['ak', 'sk'])
+        elif self['model'] in doubao_model_config:
+            check_key('豆包', ['api_key', 'endpoint_id'])
+        elif self['model'] in zhipuai_model_config:
+            check_key('智谱AI', ['api_key'])
+        elif self['model'] in gpt_model_config or True:
+            # 其他模型名默认采用openai接口调用
+            check_key('OpenAI', ['api_key'])
+        
+        if 'max_tokens' not in self:
+            raise ValueError('ModelConfig未传入key: max_tokens')
+        else:
+            assert self['max_tokens'] <= 4_096, 'max_tokens最大为4096！'
+
+
+    def get_api_keys(self) -> Dict[str, str]:
+        return {k: v for k, v in self.items() if k not in ['model']}
+
+@llm_api_cache()
+def stream_chat(model_config: ModelConfig, messages: list, response_json=False) -> Generator:
+    if isinstance(model_config, dict):
+        model_config = ModelConfig(**model_config)
+    
+    model_config.validate()
+
+    messages = ChatMessages(messages, model=model_config['model'])
+
+    assert model_config['max_tokens'] <= 4096, 'max_tokens最大为4096！'
+
+    if messages.count_message_tokens() > model_config['max_tokens']:
+        raise Exception(f'请求的文本过长，超过最大tokens:{model_config["max_tokens"]}。')
+    
+    yield messages
+    
+    if model_config['model'] in wenxin_model_config:
+        result = yield from stream_chat_with_wenxin(
+            messages,
+            model=model_config['model'],
+            ak=model_config['ak'],
+            sk=model_config['sk'],
+            max_tokens=model_config['max_tokens'],
+            response_json=response_json
+        )
+    elif model_config['model'] in doubao_model_config:  # doubao models
+        result = yield from stream_chat_with_doubao(
+            messages,
+            model=model_config['model'],
+            endpoint_id=model_config['endpoint_id'],
+            api_key=model_config['api_key'],
+            max_tokens=model_config['max_tokens'],
+            response_json=response_json
+        )
+    elif model_config['model'] in zhipuai_model_config:  # zhipuai models
+        result = yield from stream_chat_with_zhipuai(
+            messages,
+            model=model_config['model'],
+            api_key=model_config['api_key'],
+            max_tokens=model_config['max_tokens'],
+            response_json=response_json
+        )
+    elif model_config['model'] in gpt_model_config or True:  # openai models或其他兼容openai接口的模型
+        result = yield from stream_chat_with_gpt(
+            messages,
+            model=model_config['model'],
+            api_key=model_config['api_key'],
+            base_url=model_config.get('base_url'),
+            proxies=model_config.get('proxies'),
+            max_tokens=model_config['max_tokens'],
+            response_json=response_json
+        )
+    
+    result.finished = True
+    yield result
+
+    return result
+
+def test_stream_chat(model_config: ModelConfig):
+    messages = [{"role": "user", "content": "1+1=?直接输出答案即可："}]
+    for response in stream_chat(model_config, messages, use_cache=False):
+        yield response.response
+    
+    return response
+
+# 导出必要的函数和配置
+__all__ = ['ChatMessages', 'stream_chat', 'wenxin_model_config', 'doubao_model_config', 'gpt_model_config', 'zhipuai_model_config', 'ModelConfig']
diff --git a/llm_api/baidu_api.py b/llm_api/baidu_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..effd986f22d3611e1746f14a36fd6e442f5423e3
--- /dev/null
+++ b/llm_api/baidu_api.py
@@ -0,0 +1,48 @@
+import qianfan
+from .chat_messages import ChatMessages
+
+# ak和sk获取：https://console.bce.baidu.com/qianfan/ais/console/applicationConsole/application
+
+# 价格：https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
+
+wenxin_model_config = {
+    "ERNIE-3.5-8K":{
+        "Pricing": (0.0008, 0.002),
+        "currency_symbol": '￥',
+    },
+    "ERNIE-4.0-8K":{
+        "Pricing": (0.03, 0.09),
+        "currency_symbol": '￥',
+    },
+    "ERNIE-Novel-8K":{
+        "Pricing": (0.04, 0.12),
+        "currency_symbol": '￥',
+    }
+}
+
+
+def stream_chat_with_wenxin(messages, model='ERNIE-Bot', response_json=False, ak=None, sk=None, max_tokens=6000):
+    if ak is None or sk is None:
+        raise Exception('未提供有效的 ak 和 sk！')
+
+    client = qianfan.ChatCompletion(ak=ak, sk=sk)
+    
+    chatstream = client.do(model=model, 
+                           system=messages[0]['content'] if messages[0]['role'] == 'system' else None,
+                           messages=messages if messages[0]['role'] != 'system' else messages[1:], 
+                           stream=True,
+                           response_format='json_object' if response_json else 'text'
+                           )
+    
+    messages.append({'role': 'assistant', 'content': ''})
+    content = ''
+    for part in chatstream:
+        content += part['body']['result'] or ''
+        messages[-1]['content'] = content
+        yield messages
+    
+    return messages
+
+    
+if __name__ == '__main__':
+    pass
\ No newline at end of file
diff --git a/llm_api/chat_messages.py b/llm_api/chat_messages.py
new file mode 100644
index 0000000000000000000000000000000000000000..a963c8eb8a9dbfdc128d0001e96e083a436f5759
--- /dev/null
+++ b/llm_api/chat_messages.py
@@ -0,0 +1,116 @@
+import hashlib
+import re
+import json
+import os
+
+def count_characters(text):
+    chinese_pattern = re.compile(r'[\u4e00-\u9fff]+')
+    english_pattern = re.compile(r'[a-zA-Z]+')
+    other_pattern = re.compile(r'[^\u4e00-\u9fffa-zA-Z]+')
+
+    chinese_characters = chinese_pattern.findall(text)
+    english_characters = english_pattern.findall(text)
+    other_characters = other_pattern.findall(text)
+
+    chinese_count = sum(len(char) for char in chinese_characters)
+    english_count = sum(len(char) for char in english_characters)
+    other_count = sum(len(char) for char in other_characters)
+
+    return chinese_count, english_count, other_count
+
+
+model_config = {}
+
+
+model_prices = {}
+try:
+    model_prices_path = os.path.join(os.path.dirname(__file__), 'model_prices.json')
+    with open(model_prices_path, 'r') as f:
+        model_prices = json.load(f)
+except Exception as e:
+    print(f"Warning: Failed to load model_prices.json: {e}")
+
+class ChatMessages(list):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args)
+        self.model = kwargs['model'] if 'model' in kwargs else None
+        self.finished = False
+        
+        assert 'currency_symbol' not in kwargs
+
+        if not model_config:
+            from .baidu_api import wenxin_model_config
+            from .doubao_api import doubao_model_config
+            from .openai_api import gpt_model_config
+            from .zhipuai_api import zhipuai_model_config
+            model_config.update({**wenxin_model_config, **doubao_model_config, **gpt_model_config, **zhipuai_model_config})
+    
+    def __getitem__(self, index):
+        result = super().__getitem__(index)
+        if isinstance(index, slice):
+            return ChatMessages(result, model=self.model)
+        return result
+    
+    def __add__(self, other):
+        if isinstance(other, list):
+            return ChatMessages(super().__add__(other), model=self.model)
+        return NotImplemented 
+
+    def count_message_tokens(self):
+        return self.get_estimated_tokens()
+    
+    def copy(self):
+        return ChatMessages(self, model=self.model)
+    
+    def get_estimated_tokens(self):
+        num_tokens = 0
+        for message in self:
+            for key, value in message.items():
+                chinese_count, english_count, other_count = count_characters(value)
+                num_tokens += chinese_count // 2 + english_count // 5 + other_count // 2
+        return num_tokens
+    
+    def get_prompt_messages_hash(self):
+        # 转换为JSON字符串并创建哈希
+        cache_string = json.dumps(self.prompt_messages, sort_keys=True)
+        return hashlib.md5(cache_string.encode()).hexdigest()
+    
+    @property
+    def cost(self):
+        if len(self) == 0:
+            return 0
+        
+        if self.model in model_config:
+            return model_config[self.model]["Pricing"][0] * self[:-1].count_message_tokens() / 1_000 + model_config[self.model]["Pricing"][1] * self[-1:].count_message_tokens() / 1_000
+        elif self.model in model_prices:
+            return (
+                model_prices[self.model]["input_cost_per_token"] * self[:-1].count_message_tokens() +
+                model_prices[self.model]["output_cost_per_token"] * self[-1:].count_message_tokens()
+            )
+        return 0
+    
+    @property
+    def response(self):
+        return self[-1]['content'] if self[-1]['role'] == 'assistant' else ''
+    
+    @property
+    def prompt_messages(self):
+        return self[:-1] if self.response else self
+    
+    @property
+    def currency_symbol(self):
+        if self.model in model_config:
+            return model_config[self.model]["currency_symbol"]
+        else:
+            return '$'
+    
+    @property
+    def cost_info(self):
+        formatted_cost = f"{self.cost:.7f}".rstrip('0').rstrip('.')
+        return f"{self.model}: {formatted_cost}{self.currency_symbol}"
+    
+    def print(self):
+        for message in self:
+            print(f"{message['role']}".center(100, '-') + '\n')
+            print(message['content'])
+            print()
diff --git a/llm_api/doubao_api.py b/llm_api/doubao_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..de917e3a61023ef2a0bd83eba68415812d30461a
--- /dev/null
+++ b/llm_api/doubao_api.py
@@ -0,0 +1,53 @@
+from openai import OpenAI
+from .chat_messages import ChatMessages
+
+doubao_model_config = {
+    "doubao-lite-32k":{
+        "Pricing": (0.0003, 0.0006),
+        "currency_symbol": '￥',
+    },
+    "doubao-lite-128k":{
+        "Pricing": (0.0008, 0.001),
+        "currency_symbol": '￥',
+    },
+    "doubao-pro-32k":{
+        "Pricing": (0.0008, 0.002),
+        "currency_symbol": '￥',
+    },
+    "doubao-pro-128k":{
+        "Pricing": (0.005, 0.009),
+        "currency_symbol": '￥',
+    },
+}
+
+def stream_chat_with_doubao(messages, model='doubao-lite-32k', endpoint_id=None, response_json=False, api_key=None, max_tokens=32000):
+    if api_key is None:
+        raise Exception('未提供有效的 api_key！')
+    if endpoint_id is None:
+        raise Exception('未提供有效的 endpoint_id！')
+
+    client = OpenAI(
+        api_key=api_key,
+        base_url="https://ark.cn-beijing.volces.com/api/v3",
+    )
+
+    stream = client.chat.completions.create(
+        model=endpoint_id,
+        messages=messages,
+        stream=True,
+        response_format={ "type": "json_object" } if response_json else None
+    )
+
+    messages.append({'role': 'assistant', 'content': ''})
+    content = ''
+    for chunk in stream:
+        if chunk.choices:
+            delta_content = chunk.choices[0].delta.content or ''
+            content += delta_content
+            messages[-1]['content'] = content
+            yield messages
+    
+    return messages
+
+if __name__ == '__main__':
+    pass
diff --git a/llm_api/model_prices.json b/llm_api/model_prices.json
new file mode 100644
index 0000000000000000000000000000000000000000..35eda9dff7eaca17a1c3601f866d299cb2e0711b
--- /dev/null
+++ b/llm_api/model_prices.json
@@ -0,0 +1,6697 @@
+{
+    "gpt-4": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-05,
+        "output_cost_per_token": 6e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4o": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-06,
+        "output_cost_per_token": 1.5e-05,
+        "cache_read_input_token_cost": 1.25e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4o-audio-preview": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 2.5e-06,
+        "input_cost_per_audio_token": 0.0001,
+        "output_cost_per_token": 1e-05,
+        "output_cost_per_audio_token": 0.0002,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_audio_input": true,
+        "supports_audio_output": true
+    },
+    "gpt-4o-audio-preview-2024-10-01": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 2.5e-06,
+        "input_cost_per_audio_token": 0.0001,
+        "output_cost_per_token": 1e-05,
+        "output_cost_per_audio_token": 0.0002,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_audio_input": true,
+        "supports_audio_output": true
+    },
+    "gpt-4o-mini": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 6e-07,
+        "cache_read_input_token_cost": 7.5e-08,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4o-mini-2024-07-18": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 6e-07,
+        "cache_read_input_token_cost": 7.5e-08,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "o1-mini": {
+        "max_tokens": 65536,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 65536,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.2e-05,
+        "cache_read_input_token_cost": 1.5e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "o1-mini-2024-09-12": {
+        "max_tokens": 65536,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 65536,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.2e-05,
+        "cache_read_input_token_cost": 1.5e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "o1-preview": {
+        "max_tokens": 32768,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 6e-05,
+        "cache_read_input_token_cost": 7.5e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "o1-preview-2024-09-12": {
+        "max_tokens": 32768,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 6e-05,
+        "cache_read_input_token_cost": 7.5e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "chatgpt-4o-latest": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4o-2024-05-13": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4o-2024-08-06": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 2.5e-06,
+        "output_cost_per_token": 1e-05,
+        "cache_read_input_token_cost": 1.25e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4-turbo-preview": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4-0314": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-05,
+        "output_cost_per_token": 6e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_prompt_caching": true
+    },
+    "gpt-4-0613": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-05,
+        "output_cost_per_token": 6e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4-32k": {
+        "max_tokens": 4096,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 6e-05,
+        "output_cost_per_token": 0.00012,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_prompt_caching": true
+    },
+    "gpt-4-32k-0314": {
+        "max_tokens": 4096,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 6e-05,
+        "output_cost_per_token": 0.00012,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_prompt_caching": true
+    },
+    "gpt-4-32k-0613": {
+        "max_tokens": 4096,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 6e-05,
+        "output_cost_per_token": 0.00012,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_prompt_caching": true
+    },
+    "gpt-4-turbo": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4-turbo-2024-04-09": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4-1106-preview": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4-0125-preview": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4-vision-preview": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-4-1106-vision-preview": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-3.5-turbo": {
+        "max_tokens": 4097,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-3.5-turbo-0301": {
+        "max_tokens": 4097,
+        "max_input_tokens": 4097,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_prompt_caching": true
+    },
+    "gpt-3.5-turbo-0613": {
+        "max_tokens": 4097,
+        "max_input_tokens": 4097,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-3.5-turbo-1106": {
+        "max_tokens": 16385,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-3.5-turbo-0125": {
+        "max_tokens": 16385,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 1.5e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_prompt_caching": true
+    },
+    "gpt-3.5-turbo-16k": {
+        "max_tokens": 16385,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 4e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_prompt_caching": true
+    },
+    "gpt-3.5-turbo-16k-0613": {
+        "max_tokens": 16385,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 4e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_prompt_caching": true
+    },
+    "ft:gpt-3.5-turbo": {
+        "max_tokens": 4096,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 6e-06,
+        "litellm_provider": "openai",
+        "mode": "chat"
+    },
+    "ft:gpt-3.5-turbo-0125": {
+        "max_tokens": 4096,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 6e-06,
+        "litellm_provider": "openai",
+        "mode": "chat"
+    },
+    "ft:gpt-3.5-turbo-1106": {
+        "max_tokens": 4096,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 6e-06,
+        "litellm_provider": "openai",
+        "mode": "chat"
+    },
+    "ft:gpt-3.5-turbo-0613": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 6e-06,
+        "litellm_provider": "openai",
+        "mode": "chat"
+    },
+    "ft:gpt-4-0613": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-05,
+        "output_cost_per_token": 6e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing"
+    },
+    "ft:gpt-4o-2024-08-06": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 3.75e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "ft:gpt-4o-mini-2024-07-18": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 1.2e-06,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "ft:davinci-002": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
+    "ft:babbage-002": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 4e-07,
+        "output_cost_per_token": 4e-07,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
+    "text-embedding-3-large": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "output_vector_size": 3072,
+        "input_cost_per_token": 1.3e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "openai",
+        "mode": "embedding"
+    },
+    "text-embedding-3-small": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "output_vector_size": 1536,
+        "input_cost_per_token": 2e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "openai",
+        "mode": "embedding"
+    },
+    "text-embedding-ada-002": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "output_vector_size": 1536,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "openai",
+        "mode": "embedding"
+    },
+    "text-embedding-ada-002-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "openai",
+        "mode": "embedding"
+    },
+    "text-moderation-stable": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 0,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "openai",
+        "mode": "moderations"
+    },
+    "text-moderation-007": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 0,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "openai",
+        "mode": "moderations"
+    },
+    "text-moderation-latest": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 0,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "openai",
+        "mode": "moderations"
+    },
+    "256-x-256/dall-e-2": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 2.4414e-07,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "512-x-512/dall-e-2": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 6.86e-08,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "1024-x-1024/dall-e-2": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 1.9e-08,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "hd/1024-x-1792/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 6.539e-08,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "hd/1792-x-1024/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 6.539e-08,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "hd/1024-x-1024/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 7.629e-08,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "standard/1024-x-1792/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 4.359e-08,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "standard/1792-x-1024/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 4.359e-08,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "standard/1024-x-1024/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 3.81469e-08,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "whisper-1": {
+        "mode": "audio_transcription",
+        "input_cost_per_second": 0,
+        "output_cost_per_second": 0.0001,
+        "litellm_provider": "openai"
+    },
+    "tts-1": {
+        "mode": "audio_speech",
+        "input_cost_per_character": 1.5e-05,
+        "litellm_provider": "openai"
+    },
+    "tts-1-hd": {
+        "mode": "audio_speech",
+        "input_cost_per_character": 3e-05,
+        "litellm_provider": "openai"
+    },
+    "azure/tts-1": {
+        "mode": "audio_speech",
+        "input_cost_per_character": 1.5e-05,
+        "litellm_provider": "azure"
+    },
+    "azure/tts-1-hd": {
+        "mode": "audio_speech",
+        "input_cost_per_character": 3e-05,
+        "litellm_provider": "azure"
+    },
+    "azure/whisper-1": {
+        "mode": "audio_transcription",
+        "input_cost_per_second": 0,
+        "output_cost_per_second": 0.0001,
+        "litellm_provider": "azure"
+    },
+    "azure/o1-mini": {
+        "max_tokens": 65536,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 65536,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.2e-05,
+        "cache_read_input_token_cost": 1.5e-06,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "azure/o1-mini-2024-09-12": {
+        "max_tokens": 65536,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 65536,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.2e-05,
+        "cache_read_input_token_cost": 1.5e-06,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "azure/o1-preview": {
+        "max_tokens": 32768,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 6e-05,
+        "cache_read_input_token_cost": 7.5e-06,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "azure/o1-preview-2024-09-12": {
+        "max_tokens": 32768,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 6e-05,
+        "cache_read_input_token_cost": 7.5e-06,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "azure/gpt-4o": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-06,
+        "output_cost_per_token": 1.5e-05,
+        "cache_read_input_token_cost": 1.25e-06,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "azure/gpt-4o-2024-08-06": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 2.75e-06,
+        "output_cost_per_token": 1.1e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "azure/gpt-4o-2024-05-13": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "azure/global-standard/gpt-4o-2024-08-06": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 2.5e-06,
+        "output_cost_per_token": 1e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "azure/global-standard/gpt-4o-mini": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "azure/gpt-4o-mini": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 1.65e-07,
+        "output_cost_per_token": 6.6e-07,
+        "cache_read_input_token_cost": 7.5e-08,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "azure/gpt-4-turbo-2024-04-09": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "azure/gpt-4-0125-preview": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "azure/gpt-4-1106-preview": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "azure/gpt-4-0613": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-05,
+        "output_cost_per_token": 6e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "azure/gpt-4-32k-0613": {
+        "max_tokens": 4096,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 6e-05,
+        "output_cost_per_token": 0.00012,
+        "litellm_provider": "azure",
+        "mode": "chat"
+    },
+    "azure/gpt-4-32k": {
+        "max_tokens": 4096,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 6e-05,
+        "output_cost_per_token": 0.00012,
+        "litellm_provider": "azure",
+        "mode": "chat"
+    },
+    "azure/gpt-4": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-05,
+        "output_cost_per_token": 6e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "azure/gpt-4-turbo": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "azure/gpt-4-turbo-vision-preview": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_vision": true
+    },
+    "azure/gpt-35-turbo-16k-0613": {
+        "max_tokens": 4096,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 4e-06,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "azure/gpt-35-turbo-1106": {
+        "max_tokens": 4096,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "azure/gpt-35-turbo-0613": {
+        "max_tokens": 4097,
+        "max_input_tokens": 4097,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "azure/gpt-35-turbo-0301": {
+        "max_tokens": 4097,
+        "max_input_tokens": 4097,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "azure/gpt-35-turbo-0125": {
+        "max_tokens": 4096,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 1.5e-06,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "azure/gpt-35-turbo-16k": {
+        "max_tokens": 4096,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 4e-06,
+        "litellm_provider": "azure",
+        "mode": "chat"
+    },
+    "azure/gpt-35-turbo": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4097,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 1.5e-06,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "azure/gpt-3.5-turbo-instruct-0914": {
+        "max_tokens": 4097,
+        "max_input_tokens": 4097,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
+    "azure/gpt-35-turbo-instruct": {
+        "max_tokens": 4097,
+        "max_input_tokens": 4097,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
+    "azure/gpt-35-turbo-instruct-0914": {
+        "max_tokens": 4097,
+        "max_input_tokens": 4097,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
+    "azure/mistral-large-latest": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "azure/mistral-large-2402": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "azure/command-r-plus": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "azure/ada": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure",
+        "mode": "embedding"
+    },
+    "azure/text-embedding-ada-002": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure",
+        "mode": "embedding"
+    },
+    "azure/text-embedding-3-large": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "input_cost_per_token": 1.3e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure",
+        "mode": "embedding"
+    },
+    "azure/text-embedding-3-small": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "input_cost_per_token": 2e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure",
+        "mode": "embedding"
+    },
+    "azure/standard/1024-x-1024/dall-e-3": {
+        "input_cost_per_pixel": 3.81469e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure",
+        "mode": "image_generation"
+    },
+    "azure/hd/1024-x-1024/dall-e-3": {
+        "input_cost_per_pixel": 7.629e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure",
+        "mode": "image_generation"
+    },
+    "azure/standard/1024-x-1792/dall-e-3": {
+        "input_cost_per_pixel": 4.359e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure",
+        "mode": "image_generation"
+    },
+    "azure/standard/1792-x-1024/dall-e-3": {
+        "input_cost_per_pixel": 4.359e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure",
+        "mode": "image_generation"
+    },
+    "azure/hd/1024-x-1792/dall-e-3": {
+        "input_cost_per_pixel": 6.539e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure",
+        "mode": "image_generation"
+    },
+    "azure/hd/1792-x-1024/dall-e-3": {
+        "input_cost_per_pixel": 6.539e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure",
+        "mode": "image_generation"
+    },
+    "azure/standard/1024-x-1024/dall-e-2": {
+        "input_cost_per_pixel": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure",
+        "mode": "image_generation"
+    },
+    "azure_ai/jamba-instruct": {
+        "max_tokens": 4096,
+        "max_input_tokens": 70000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 7e-07,
+        "litellm_provider": "azure_ai",
+        "mode": "chat"
+    },
+    "azure_ai/mistral-large": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 4e-06,
+        "output_cost_per_token": 1.2e-05,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "azure_ai/mistral-small": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "azure_ai",
+        "supports_function_calling": true,
+        "mode": "chat"
+    },
+    "azure_ai/Meta-Llama-3-70B-Instruct": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1.1e-06,
+        "output_cost_per_token": 3.7e-07,
+        "litellm_provider": "azure_ai",
+        "mode": "chat"
+    },
+    "azure_ai/Meta-Llama-3.1-8B-Instruct": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 6.1e-07,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice"
+    },
+    "azure_ai/Meta-Llama-3.1-70B-Instruct": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 2.68e-06,
+        "output_cost_per_token": 3.54e-06,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice"
+    },
+    "azure_ai/Meta-Llama-3.1-405B-Instruct": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 5.33e-06,
+        "output_cost_per_token": 1.6e-05,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice"
+    },
+    "azure_ai/cohere-rerank-v3-multilingual": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "max_query_tokens": 2048,
+        "input_cost_per_token": 0.0,
+        "input_cost_per_query": 0.002,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure_ai",
+        "mode": "rerank"
+    },
+    "azure_ai/cohere-rerank-v3-english": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "max_query_tokens": 2048,
+        "input_cost_per_token": 0.0,
+        "input_cost_per_query": 0.002,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure_ai",
+        "mode": "rerank"
+    },
+    "azure_ai/Cohere-embed-v3-english": {
+        "max_tokens": 512,
+        "max_input_tokens": 512,
+        "output_vector_size": 1024,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure_ai",
+        "mode": "embedding",
+        "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice"
+    },
+    "azure_ai/Cohere-embed-v3-multilingual": {
+        "max_tokens": 512,
+        "max_input_tokens": 512,
+        "output_vector_size": 1024,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure_ai",
+        "mode": "embedding",
+        "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice"
+    },
+    "babbage-002": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 4e-07,
+        "output_cost_per_token": 4e-07,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
+    "davinci-002": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
+    "gpt-3.5-turbo-instruct": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
+    "gpt-3.5-turbo-instruct-0914": {
+        "max_tokens": 4097,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4097,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
+    "claude-instant-1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.63e-06,
+        "output_cost_per_token": 5.51e-06,
+        "litellm_provider": "anthropic",
+        "mode": "chat"
+    },
+    "mistral/mistral-tiny": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 2.5e-07,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_assistant_prefill": true
+    },
+    "mistral/mistral-small": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "mistral",
+        "supports_function_calling": true,
+        "mode": "chat",
+        "supports_assistant_prefill": true
+    },
+    "mistral/mistral-small-latest": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "mistral",
+        "supports_function_calling": true,
+        "mode": "chat",
+        "supports_assistant_prefill": true
+    },
+    "mistral/mistral-medium": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2.7e-06,
+        "output_cost_per_token": 8.1e-06,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_assistant_prefill": true
+    },
+    "mistral/mistral-medium-latest": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2.7e-06,
+        "output_cost_per_token": 8.1e-06,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_assistant_prefill": true
+    },
+    "mistral/mistral-medium-2312": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2.7e-06,
+        "output_cost_per_token": 8.1e-06,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_assistant_prefill": true
+    },
+    "mistral/mistral-large-latest": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 9e-06,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true
+    },
+    "mistral/mistral-large-2402": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 4e-06,
+        "output_cost_per_token": 1.2e-05,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true
+    },
+    "mistral/mistral-large-2407": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 9e-06,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true
+    },
+    "mistral/pixtral-12b-2409": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 1.5e-07,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true,
+        "supports_vision": true
+    },
+    "mistral/open-mistral-7b": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 2.5e-07,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_assistant_prefill": true
+    },
+    "mistral/open-mixtral-8x7b": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 7e-07,
+        "output_cost_per_token": 7e-07,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true
+    },
+    "mistral/open-mixtral-8x22b": {
+        "max_tokens": 8191,
+        "max_input_tokens": 64000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2e-06,
+        "output_cost_per_token": 6e-06,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true
+    },
+    "mistral/codestral-latest": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_assistant_prefill": true
+    },
+    "mistral/codestral-2405": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_assistant_prefill": true
+    },
+    "mistral/open-mistral-nemo": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 3e-07,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/",
+        "supports_assistant_prefill": true
+    },
+    "mistral/open-mistral-nemo-2407": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 3e-07,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/",
+        "supports_assistant_prefill": true
+    },
+    "mistral/open-codestral-mamba": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 2.5e-07,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/",
+        "supports_assistant_prefill": true
+    },
+    "mistral/codestral-mamba-latest": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 2.5e-07,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/",
+        "supports_assistant_prefill": true
+    },
+    "mistral/mistral-embed": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "input_cost_per_token": 1e-07,
+        "litellm_provider": "mistral",
+        "mode": "embedding"
+    },
+    "deepseek-chat": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.4e-07,
+        "input_cost_per_token_cache_hit": 1.4e-08,
+        "output_cost_per_token": 2.8e-07,
+        "litellm_provider": "deepseek",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true,
+        "supports_tool_choice": true,
+        "supports_prompt_caching": true
+    },
+    "codestral/codestral-latest": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "codestral",
+        "mode": "chat",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/",
+        "supports_assistant_prefill": true
+    },
+    "codestral/codestral-2405": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "codestral",
+        "mode": "chat",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/",
+        "supports_assistant_prefill": true
+    },
+    "text-completion-codestral/codestral-latest": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "text-completion-codestral",
+        "mode": "completion",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/"
+    },
+    "text-completion-codestral/codestral-2405": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "text-completion-codestral",
+        "mode": "completion",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/"
+    },
+    "deepseek-coder": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.4e-07,
+        "input_cost_per_token_cache_hit": 1.4e-08,
+        "output_cost_per_token": 2.8e-07,
+        "litellm_provider": "deepseek",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true,
+        "supports_tool_choice": true,
+        "supports_prompt_caching": true
+    },
+    "groq/llama2-70b-4096": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 7e-07,
+        "output_cost_per_token": 8e-07,
+        "litellm_provider": "groq",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "groq/llama3-8b-8192": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 5e-08,
+        "output_cost_per_token": 8e-08,
+        "litellm_provider": "groq",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "groq/llama3-70b-8192": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 5.9e-07,
+        "output_cost_per_token": 7.9e-07,
+        "litellm_provider": "groq",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "groq/llama-3.1-8b-instant": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 5e-08,
+        "output_cost_per_token": 8e-08,
+        "litellm_provider": "groq",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "groq/llama-3.1-70b-versatile": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 5.9e-07,
+        "output_cost_per_token": 7.9e-07,
+        "litellm_provider": "groq",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "groq/llama-3.1-405b-reasoning": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 5.9e-07,
+        "output_cost_per_token": 7.9e-07,
+        "litellm_provider": "groq",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "groq/mixtral-8x7b-32768": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 2.4e-07,
+        "output_cost_per_token": 2.4e-07,
+        "litellm_provider": "groq",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "groq/gemma-7b-it": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 7e-08,
+        "output_cost_per_token": 7e-08,
+        "litellm_provider": "groq",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "groq/gemma2-9b-it": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "groq",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "groq/llama3-groq-70b-8192-tool-use-preview": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 8.9e-07,
+        "output_cost_per_token": 8.9e-07,
+        "litellm_provider": "groq",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "groq/llama3-groq-8b-8192-tool-use-preview": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1.9e-07,
+        "output_cost_per_token": 1.9e-07,
+        "litellm_provider": "groq",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "cerebras/llama3.1-8b": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 1e-07,
+        "litellm_provider": "cerebras",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "cerebras/llama3.1-70b": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 6e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "cerebras",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "friendliai/mixtral-8x7b-instruct-v0-1": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 4e-07,
+        "output_cost_per_token": 4e-07,
+        "litellm_provider": "friendliai",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "friendliai/meta-llama-3-8b-instruct": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 1e-07,
+        "litellm_provider": "friendliai",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "friendliai/meta-llama-3-70b-instruct": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 8e-07,
+        "output_cost_per_token": 8e-07,
+        "litellm_provider": "friendliai",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "claude-instant-1.2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.63e-07,
+        "output_cost_per_token": 5.51e-07,
+        "litellm_provider": "anthropic",
+        "mode": "chat"
+    },
+    "claude-2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "anthropic",
+        "mode": "chat"
+    },
+    "claude-2.1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "anthropic",
+        "mode": "chat"
+    },
+    "claude-3-haiku-20240307": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 1.25e-06,
+        "cache_creation_input_token_cost": 3e-07,
+        "cache_read_input_token_cost": 3e-08,
+        "litellm_provider": "anthropic",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 264,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
+    },
+    "claude-3-haiku-latest": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 1.25e-06,
+        "cache_creation_input_token_cost": 3e-07,
+        "cache_read_input_token_cost": 3e-08,
+        "litellm_provider": "anthropic",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 264,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
+    },
+    "claude-3-opus-20240229": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 7.5e-05,
+        "cache_creation_input_token_cost": 1.875e-05,
+        "cache_read_input_token_cost": 1.5e-06,
+        "litellm_provider": "anthropic",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 395,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
+    },
+    "claude-3-opus-latest": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 7.5e-05,
+        "cache_creation_input_token_cost": 1.875e-05,
+        "cache_read_input_token_cost": 1.5e-06,
+        "litellm_provider": "anthropic",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 395,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
+    },
+    "claude-3-sonnet-20240229": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "anthropic",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 159,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
+    },
+    "claude-3-5-sonnet-20240620": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "cache_creation_input_token_cost": 3.75e-06,
+        "cache_read_input_token_cost": 3e-07,
+        "litellm_provider": "anthropic",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 159,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
+    },
+    "claude-3-5-sonnet-20241022": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "cache_creation_input_token_cost": 3.75e-06,
+        "cache_read_input_token_cost": 3e-07,
+        "litellm_provider": "anthropic",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 159,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
+    },
+    "claude-3-5-sonnet-latest": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "cache_creation_input_token_cost": 3.75e-06,
+        "cache_read_input_token_cost": 3e-07,
+        "litellm_provider": "anthropic",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 159,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
+    },
+    "text-bison": {
+        "max_tokens": 2048,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 2048,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-bison@001": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-bison@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-bison32k": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-bison32k@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-unicorn": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 2.8e-05,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-unicorn@001": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 2.8e-05,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "chat-bison": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "chat-bison@001": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "chat-bison@002": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "chat-bison-32k": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "chat-bison-32k@002": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-bison": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-bison@001": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-bison@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-bison32k": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-bison-32k@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-gecko@001": {
+        "max_tokens": 64,
+        "max_input_tokens": 2048,
+        "max_output_tokens": 64,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-gecko@002": {
+        "max_tokens": 64,
+        "max_input_tokens": 2048,
+        "max_output_tokens": 64,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-gecko": {
+        "max_tokens": 64,
+        "max_input_tokens": 2048,
+        "max_output_tokens": 64,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-gecko-latest": {
+        "max_tokens": 64,
+        "max_input_tokens": 2048,
+        "max_output_tokens": 64,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "codechat-bison@latest": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "codechat-bison": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "codechat-bison@001": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "codechat-bison@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "codechat-bison-32k": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "codechat-bison-32k@002": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "input_cost_per_character": 2.5e-07,
+        "output_cost_per_character": 5e-07,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-pro": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32760,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.0025,
+        "input_cost_per_video_per_second": 0.002,
+        "input_cost_per_token": 5e-07,
+        "input_cost_per_character": 1.25e-07,
+        "output_cost_per_token": 1.5e-06,
+        "output_cost_per_character": 3.75e-07,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
+    "gemini-1.0-pro": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32760,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.0025,
+        "input_cost_per_video_per_second": 0.002,
+        "input_cost_per_token": 5e-07,
+        "input_cost_per_character": 1.25e-07,
+        "output_cost_per_token": 1.5e-06,
+        "output_cost_per_character": 3.75e-07,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
+    },
+    "gemini-1.0-pro-001": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32760,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.0025,
+        "input_cost_per_video_per_second": 0.002,
+        "input_cost_per_token": 5e-07,
+        "input_cost_per_character": 1.25e-07,
+        "output_cost_per_token": 1.5e-06,
+        "output_cost_per_character": 3.75e-07,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.0-ultra": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 2048,
+        "input_cost_per_image": 0.0025,
+        "input_cost_per_video_per_second": 0.002,
+        "input_cost_per_token": 5e-07,
+        "input_cost_per_character": 1.25e-07,
+        "output_cost_per_token": 1.5e-06,
+        "output_cost_per_character": 3.75e-07,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.0-ultra-001": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 2048,
+        "input_cost_per_image": 0.0025,
+        "input_cost_per_video_per_second": 0.002,
+        "input_cost_per_token": 5e-07,
+        "input_cost_per_character": 1.25e-07,
+        "output_cost_per_token": 1.5e-06,
+        "output_cost_per_character": 3.75e-07,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.0-pro-002": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32760,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.0025,
+        "input_cost_per_video_per_second": 0.002,
+        "input_cost_per_token": 5e-07,
+        "input_cost_per_character": 1.25e-07,
+        "output_cost_per_token": 1.5e-06,
+        "output_cost_per_character": 3.75e-07,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.5-pro": {
+        "max_tokens": 8192,
+        "max_input_tokens": 2097152,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.00032875,
+        "input_cost_per_audio_per_second": 3.125e-05,
+        "input_cost_per_video_per_second": 0.00032875,
+        "input_cost_per_token": 7.8125e-08,
+        "input_cost_per_character": 3.125e-07,
+        "input_cost_per_image_above_128k_tokens": 0.0006575,
+        "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
+        "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05,
+        "input_cost_per_token_above_128k_tokens": 1.5625e-07,
+        "input_cost_per_character_above_128k_tokens": 6.25e-07,
+        "output_cost_per_token": 3.125e-07,
+        "output_cost_per_character": 1.25e-06,
+        "output_cost_per_token_above_128k_tokens": 6.25e-07,
+        "output_cost_per_character_above_128k_tokens": 2.5e-06,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.5-pro-002": {
+        "max_tokens": 8192,
+        "max_input_tokens": 2097152,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.00032875,
+        "input_cost_per_audio_per_second": 3.125e-05,
+        "input_cost_per_video_per_second": 0.00032875,
+        "input_cost_per_token": 7.8125e-08,
+        "input_cost_per_character": 3.125e-07,
+        "input_cost_per_image_above_128k_tokens": 0.0006575,
+        "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
+        "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05,
+        "input_cost_per_token_above_128k_tokens": 1.5625e-07,
+        "input_cost_per_character_above_128k_tokens": 6.25e-07,
+        "output_cost_per_token": 3.125e-07,
+        "output_cost_per_character": 1.25e-06,
+        "output_cost_per_token_above_128k_tokens": 6.25e-07,
+        "output_cost_per_character_above_128k_tokens": 2.5e-06,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro"
+    },
+    "gemini-1.5-pro-001": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.00032875,
+        "input_cost_per_audio_per_second": 3.125e-05,
+        "input_cost_per_video_per_second": 0.00032875,
+        "input_cost_per_token": 7.8125e-08,
+        "input_cost_per_character": 3.125e-07,
+        "input_cost_per_image_above_128k_tokens": 0.0006575,
+        "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
+        "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05,
+        "input_cost_per_token_above_128k_tokens": 1.5625e-07,
+        "input_cost_per_character_above_128k_tokens": 6.25e-07,
+        "output_cost_per_token": 3.125e-07,
+        "output_cost_per_character": 1.25e-06,
+        "output_cost_per_token_above_128k_tokens": 6.25e-07,
+        "output_cost_per_character_above_128k_tokens": 2.5e-06,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.5-pro-preview-0514": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.00032875,
+        "input_cost_per_audio_per_second": 3.125e-05,
+        "input_cost_per_video_per_second": 0.00032875,
+        "input_cost_per_token": 7.8125e-08,
+        "input_cost_per_character": 3.125e-07,
+        "input_cost_per_image_above_128k_tokens": 0.0006575,
+        "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
+        "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05,
+        "input_cost_per_token_above_128k_tokens": 1.5625e-07,
+        "input_cost_per_character_above_128k_tokens": 6.25e-07,
+        "output_cost_per_token": 3.125e-07,
+        "output_cost_per_character": 1.25e-06,
+        "output_cost_per_token_above_128k_tokens": 6.25e-07,
+        "output_cost_per_character_above_128k_tokens": 2.5e-06,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.5-pro-preview-0215": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.00032875,
+        "input_cost_per_audio_per_second": 3.125e-05,
+        "input_cost_per_video_per_second": 0.00032875,
+        "input_cost_per_token": 7.8125e-08,
+        "input_cost_per_character": 3.125e-07,
+        "input_cost_per_image_above_128k_tokens": 0.0006575,
+        "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
+        "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05,
+        "input_cost_per_token_above_128k_tokens": 1.5625e-07,
+        "input_cost_per_character_above_128k_tokens": 6.25e-07,
+        "output_cost_per_token": 3.125e-07,
+        "output_cost_per_character": 1.25e-06,
+        "output_cost_per_token_above_128k_tokens": 6.25e-07,
+        "output_cost_per_character_above_128k_tokens": 2.5e-06,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.5-pro-preview-0409": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.00032875,
+        "input_cost_per_audio_per_second": 3.125e-05,
+        "input_cost_per_video_per_second": 0.00032875,
+        "input_cost_per_token": 7.8125e-08,
+        "input_cost_per_character": 3.125e-07,
+        "input_cost_per_image_above_128k_tokens": 0.0006575,
+        "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
+        "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05,
+        "input_cost_per_token_above_128k_tokens": 1.5625e-07,
+        "input_cost_per_character_above_128k_tokens": 6.25e-07,
+        "output_cost_per_token": 3.125e-07,
+        "output_cost_per_character": 1.25e-06,
+        "output_cost_per_token_above_128k_tokens": 6.25e-07,
+        "output_cost_per_character_above_128k_tokens": 2.5e-06,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.5-flash": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 2e-05,
+        "input_cost_per_video_per_second": 2e-05,
+        "input_cost_per_audio_per_second": 2e-06,
+        "input_cost_per_token": 4.688e-09,
+        "input_cost_per_character": 1.875e-08,
+        "input_cost_per_token_above_128k_tokens": 1e-06,
+        "input_cost_per_character_above_128k_tokens": 2.5e-07,
+        "input_cost_per_image_above_128k_tokens": 4e-05,
+        "input_cost_per_video_per_second_above_128k_tokens": 4e-05,
+        "input_cost_per_audio_per_second_above_128k_tokens": 4e-06,
+        "output_cost_per_token": 4.6875e-09,
+        "output_cost_per_character": 1.875e-08,
+        "output_cost_per_token_above_128k_tokens": 9.375e-09,
+        "output_cost_per_character_above_128k_tokens": 3.75e-08,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.5-flash-exp-0827": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 2e-05,
+        "input_cost_per_video_per_second": 2e-05,
+        "input_cost_per_audio_per_second": 2e-06,
+        "input_cost_per_token": 4.688e-09,
+        "input_cost_per_character": 1.875e-08,
+        "input_cost_per_token_above_128k_tokens": 1e-06,
+        "input_cost_per_character_above_128k_tokens": 2.5e-07,
+        "input_cost_per_image_above_128k_tokens": 4e-05,
+        "input_cost_per_video_per_second_above_128k_tokens": 4e-05,
+        "input_cost_per_audio_per_second_above_128k_tokens": 4e-06,
+        "output_cost_per_token": 4.6875e-09,
+        "output_cost_per_character": 1.875e-08,
+        "output_cost_per_token_above_128k_tokens": 9.375e-09,
+        "output_cost_per_character_above_128k_tokens": 3.75e-08,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.5-flash-002": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 2e-05,
+        "input_cost_per_video_per_second": 2e-05,
+        "input_cost_per_audio_per_second": 2e-06,
+        "input_cost_per_token": 4.688e-09,
+        "input_cost_per_character": 1.875e-08,
+        "input_cost_per_token_above_128k_tokens": 1e-06,
+        "input_cost_per_character_above_128k_tokens": 2.5e-07,
+        "input_cost_per_image_above_128k_tokens": 4e-05,
+        "input_cost_per_video_per_second_above_128k_tokens": 4e-05,
+        "input_cost_per_audio_per_second_above_128k_tokens": 4e-06,
+        "output_cost_per_token": 4.6875e-09,
+        "output_cost_per_character": 1.875e-08,
+        "output_cost_per_token_above_128k_tokens": 9.375e-09,
+        "output_cost_per_character_above_128k_tokens": 3.75e-08,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-flash"
+    },
+    "gemini-1.5-flash-001": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 2e-05,
+        "input_cost_per_video_per_second": 2e-05,
+        "input_cost_per_audio_per_second": 2e-06,
+        "input_cost_per_token": 4.688e-09,
+        "input_cost_per_character": 1.875e-08,
+        "input_cost_per_token_above_128k_tokens": 1e-06,
+        "input_cost_per_character_above_128k_tokens": 2.5e-07,
+        "input_cost_per_image_above_128k_tokens": 4e-05,
+        "input_cost_per_video_per_second_above_128k_tokens": 4e-05,
+        "input_cost_per_audio_per_second_above_128k_tokens": 4e-06,
+        "output_cost_per_token": 4.6875e-09,
+        "output_cost_per_character": 1.875e-08,
+        "output_cost_per_token_above_128k_tokens": 9.375e-09,
+        "output_cost_per_character_above_128k_tokens": 3.75e-08,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.5-flash-preview-0514": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 2e-05,
+        "input_cost_per_video_per_second": 2e-05,
+        "input_cost_per_audio_per_second": 2e-06,
+        "input_cost_per_token": 4.688e-09,
+        "input_cost_per_character": 1.875e-08,
+        "input_cost_per_token_above_128k_tokens": 1e-06,
+        "input_cost_per_character_above_128k_tokens": 2.5e-07,
+        "input_cost_per_image_above_128k_tokens": 4e-05,
+        "input_cost_per_video_per_second_above_128k_tokens": 4e-05,
+        "input_cost_per_audio_per_second_above_128k_tokens": 4e-06,
+        "output_cost_per_token": 4.6875e-09,
+        "output_cost_per_character": 1.875e-08,
+        "output_cost_per_token_above_128k_tokens": 9.375e-09,
+        "output_cost_per_character_above_128k_tokens": 3.75e-08,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-pro-experimental": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 0,
+        "input_cost_per_character": 0,
+        "output_cost_per_character": 0,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": false,
+        "supports_tool_choice": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental"
+    },
+    "gemini-flash-experimental": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 0,
+        "input_cost_per_character": 0,
+        "output_cost_per_character": 0,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": false,
+        "supports_tool_choice": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental"
+    },
+    "gemini-pro-vision": {
+        "max_tokens": 2048,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 2048,
+        "max_images_per_prompt": 16,
+        "max_videos_per_prompt": 1,
+        "max_video_length": 2,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "vertex_ai-vision-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.0-pro-vision": {
+        "max_tokens": 2048,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 2048,
+        "max_images_per_prompt": 16,
+        "max_videos_per_prompt": 1,
+        "max_video_length": 2,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "vertex_ai-vision-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini-1.0-pro-vision-001": {
+        "max_tokens": 2048,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 2048,
+        "max_images_per_prompt": 16,
+        "max_videos_per_prompt": 1,
+        "max_video_length": 2,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "vertex_ai-vision-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "medlm-medium": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 8192,
+        "input_cost_per_character": 5e-07,
+        "output_cost_per_character": 1e-06,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "medlm-large": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_character": 5e-06,
+        "output_cost_per_character": 1.5e-05,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "vertex_ai/claude-3-sonnet@20240229": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
+    "vertex_ai/claude-3-5-sonnet@20240620": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
+    "vertex_ai/claude-3-5-sonnet-v2@20241022": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
+    "vertex_ai/claude-3-haiku@20240307": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 1.25e-06,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
+    "vertex_ai/claude-3-opus@20240229": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 7.5e-05,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
+    "vertex_ai/meta/llama3-405b-instruct-maas": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 32000,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "vertex_ai-llama_models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
+    },
+    "vertex_ai/meta/llama3-70b-instruct-maas": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 32000,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "vertex_ai-llama_models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
+    },
+    "vertex_ai/meta/llama3-8b-instruct-maas": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 32000,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "vertex_ai-llama_models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
+    },
+    "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": {
+        "max_tokens": 8192,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "vertex_ai-llama_models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas"
+    },
+    "vertex_ai/mistral-large@latest": {
+        "max_tokens": 8191,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 9e-06,
+        "litellm_provider": "vertex_ai-mistral_models",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "vertex_ai/mistral-large@2407": {
+        "max_tokens": 8191,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 9e-06,
+        "litellm_provider": "vertex_ai-mistral_models",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "vertex_ai/mistral-nemo@latest": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "vertex_ai-mistral_models",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "vertex_ai/jamba-1.5-mini@001": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 4e-07,
+        "litellm_provider": "vertex_ai-ai21_models",
+        "mode": "chat"
+    },
+    "vertex_ai/jamba-1.5-large@001": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2e-06,
+        "output_cost_per_token": 8e-06,
+        "litellm_provider": "vertex_ai-ai21_models",
+        "mode": "chat"
+    },
+    "vertex_ai/jamba-1.5": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 4e-07,
+        "litellm_provider": "vertex_ai-ai21_models",
+        "mode": "chat"
+    },
+    "vertex_ai/jamba-1.5-mini": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 4e-07,
+        "litellm_provider": "vertex_ai-ai21_models",
+        "mode": "chat"
+    },
+    "vertex_ai/jamba-1.5-large": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2e-06,
+        "output_cost_per_token": 8e-06,
+        "litellm_provider": "vertex_ai-ai21_models",
+        "mode": "chat"
+    },
+    "vertex_ai/mistral-nemo@2407": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "vertex_ai-mistral_models",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "vertex_ai/codestral@latest": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "vertex_ai-mistral_models",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "vertex_ai/codestral@2405": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "vertex_ai-mistral_models",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "vertex_ai/imagegeneration@006": {
+        "cost_per_image": 0.02,
+        "litellm_provider": "vertex_ai-image-models",
+        "mode": "image_generation",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
+    "vertex_ai/imagen-3.0-generate-001": {
+        "cost_per_image": 0.04,
+        "litellm_provider": "vertex_ai-image-models",
+        "mode": "image_generation",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
+    "vertex_ai/imagen-3.0-fast-generate-001": {
+        "cost_per_image": 0.02,
+        "litellm_provider": "vertex_ai-image-models",
+        "mode": "image_generation",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
+    "text-embedding-004": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 6.25e-09,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
+    },
+    "text-multilingual-embedding-002": {
+        "max_tokens": 2048,
+        "max_input_tokens": 2048,
+        "output_vector_size": 768,
+        "input_cost_per_token": 6.25e-09,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
+    },
+    "textembedding-gecko": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 6.25e-09,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "textembedding-gecko-multilingual": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 6.25e-09,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "textembedding-gecko-multilingual@001": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 6.25e-09,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "textembedding-gecko@001": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 6.25e-09,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "textembedding-gecko@003": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 6.25e-09,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-embedding-preview-0409": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 6.25e-09,
+        "input_cost_per_token_batch_requests": 5e-09,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
+    "text-multilingual-embedding-preview-0409": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 6.25e-09,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "palm/chat-bison": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "litellm_provider": "palm",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "palm/chat-bison-001": {
+        "max_tokens": 4096,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "litellm_provider": "palm",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "palm/text-bison": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "litellm_provider": "palm",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "palm/text-bison-001": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "litellm_provider": "palm",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "palm/text-bison-safety-off": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "litellm_provider": "palm",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "palm/text-bison-safety-recitation-off": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 1.25e-07,
+        "litellm_provider": "palm",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini/gemini-1.5-flash-002": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_token": 7.5e-08,
+        "input_cost_per_token_above_128k_tokens": 1.5e-07,
+        "output_cost_per_token": 3e-07,
+        "output_cost_per_token_above_128k_tokens": 6e-07,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_prompt_caching": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-1.5-flash-001": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_token": 7.5e-08,
+        "input_cost_per_token_above_128k_tokens": 1.5e-07,
+        "output_cost_per_token": 3e-07,
+        "output_cost_per_token_above_128k_tokens": 6e-07,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_prompt_caching": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-1.5-flash": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_token": 7.5e-08,
+        "input_cost_per_token_above_128k_tokens": 1.5e-07,
+        "output_cost_per_token": 3e-07,
+        "output_cost_per_token_above_128k_tokens": 6e-07,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-1.5-flash-latest": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_token": 7.5e-08,
+        "input_cost_per_token_above_128k_tokens": 1.5e-07,
+        "output_cost_per_token": 3e-07,
+        "output_cost_per_token_above_128k_tokens": 6e-07,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-1.5-flash-8b-exp-0924": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_token": 0,
+        "input_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-1.5-flash-exp-0827": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_token": 0,
+        "input_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-1.5-flash-8b-exp-0827": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_token": 0,
+        "input_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-pro": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32760,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.5e-07,
+        "input_cost_per_token_above_128k_tokens": 7e-07,
+        "output_cost_per_token": 1.05e-06,
+        "output_cost_per_token_above_128k_tokens": 2.1e-06,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini/gemini-1.5-pro": {
+        "max_tokens": 8192,
+        "max_input_tokens": 2097152,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.5e-06,
+        "input_cost_per_token_above_128k_tokens": 7e-06,
+        "output_cost_per_token": 1.05e-05,
+        "output_cost_per_token_above_128k_tokens": 2.1e-05,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-1.5-pro-002": {
+        "max_tokens": 8192,
+        "max_input_tokens": 2097152,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.5e-06,
+        "input_cost_per_token_above_128k_tokens": 7e-06,
+        "output_cost_per_token": 1.05e-05,
+        "output_cost_per_token_above_128k_tokens": 2.1e-05,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "supports_prompt_caching": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-1.5-pro-001": {
+        "max_tokens": 8192,
+        "max_input_tokens": 2097152,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.5e-06,
+        "input_cost_per_token_above_128k_tokens": 7e-06,
+        "output_cost_per_token": 1.05e-05,
+        "output_cost_per_token_above_128k_tokens": 2.1e-05,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "supports_prompt_caching": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-1.5-pro-exp-0801": {
+        "max_tokens": 8192,
+        "max_input_tokens": 2097152,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.5e-06,
+        "input_cost_per_token_above_128k_tokens": 7e-06,
+        "output_cost_per_token": 1.05e-05,
+        "output_cost_per_token_above_128k_tokens": 2.1e-05,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-1.5-pro-exp-0827": {
+        "max_tokens": 8192,
+        "max_input_tokens": 2097152,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0,
+        "input_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-1.5-pro-latest": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.5e-06,
+        "input_cost_per_token_above_128k_tokens": 7e-06,
+        "output_cost_per_token": 1.05e-06,
+        "output_cost_per_token_above_128k_tokens": 2.1e-05,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_tool_choice": true,
+        "supports_response_schema": true,
+        "source": "https://ai.google.dev/pricing"
+    },
+    "gemini/gemini-pro-vision": {
+        "max_tokens": 2048,
+        "max_input_tokens": 30720,
+        "max_output_tokens": 2048,
+        "input_cost_per_token": 3.5e-07,
+        "input_cost_per_token_above_128k_tokens": 7e-07,
+        "output_cost_per_token": 1.05e-06,
+        "output_cost_per_token_above_128k_tokens": 2.1e-06,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini/gemini-gemma-2-27b-it": {
+        "max_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.5e-07,
+        "output_cost_per_token": 1.05e-06,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "gemini/gemini-gemma-2-9b-it": {
+        "max_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.5e-07,
+        "output_cost_per_token": 1.05e-06,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "command-r": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "cohere_chat",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "command-r-08-2024": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "cohere_chat",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "command-light": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "cohere_chat",
+        "mode": "chat"
+    },
+    "command-r-plus": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.5e-06,
+        "output_cost_per_token": 1e-05,
+        "litellm_provider": "cohere_chat",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "command-r-plus-08-2024": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.5e-06,
+        "output_cost_per_token": 1e-05,
+        "litellm_provider": "cohere_chat",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "command-nightly": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "cohere",
+        "mode": "completion"
+    },
+    "command": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "cohere",
+        "mode": "completion"
+    },
+    "rerank-english-v3.0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "max_query_tokens": 2048,
+        "input_cost_per_token": 0.0,
+        "input_cost_per_query": 0.002,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "cohere",
+        "mode": "rerank"
+    },
+    "rerank-multilingual-v3.0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "max_query_tokens": 2048,
+        "input_cost_per_token": 0.0,
+        "input_cost_per_query": 0.002,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "cohere",
+        "mode": "rerank"
+    },
+    "rerank-english-v2.0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "max_query_tokens": 2048,
+        "input_cost_per_token": 0.0,
+        "input_cost_per_query": 0.002,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "cohere",
+        "mode": "rerank"
+    },
+    "rerank-multilingual-v2.0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "max_query_tokens": 2048,
+        "input_cost_per_token": 0.0,
+        "input_cost_per_query": 0.002,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "cohere",
+        "mode": "rerank"
+    },
+    "embed-english-v3.0": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "input_cost_per_token": 1e-07,
+        "input_cost_per_image": 0.0001,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "cohere",
+        "mode": "embedding",
+        "supports_image_input": true
+    },
+    "embed-english-light-v3.0": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "cohere",
+        "mode": "embedding"
+    },
+    "embed-multilingual-v3.0": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "cohere",
+        "mode": "embedding"
+    },
+    "embed-english-v2.0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "cohere",
+        "mode": "embedding"
+    },
+    "embed-english-light-v2.0": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "cohere",
+        "mode": "embedding"
+    },
+    "embed-multilingual-v2.0": {
+        "max_tokens": 768,
+        "max_input_tokens": 768,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "cohere",
+        "mode": "embedding"
+    },
+    "replicate/meta/llama-2-13b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/meta/llama-2-13b-chat": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/meta/llama-2-70b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 6.5e-07,
+        "output_cost_per_token": 2.75e-06,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/meta/llama-2-70b-chat": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 6.5e-07,
+        "output_cost_per_token": 2.75e-06,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/meta/llama-2-7b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-08,
+        "output_cost_per_token": 2.5e-07,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/meta/llama-2-7b-chat": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-08,
+        "output_cost_per_token": 2.5e-07,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/meta/llama-3-70b": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 6.5e-07,
+        "output_cost_per_token": 2.75e-06,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/meta/llama-3-70b-instruct": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 6.5e-07,
+        "output_cost_per_token": 2.75e-06,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/meta/llama-3-8b": {
+        "max_tokens": 8086,
+        "max_input_tokens": 8086,
+        "max_output_tokens": 8086,
+        "input_cost_per_token": 5e-08,
+        "output_cost_per_token": 2.5e-07,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/meta/llama-3-8b-instruct": {
+        "max_tokens": 8086,
+        "max_input_tokens": 8086,
+        "max_output_tokens": 8086,
+        "input_cost_per_token": 5e-08,
+        "output_cost_per_token": 2.5e-07,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/mistralai/mistral-7b-v0.1": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-08,
+        "output_cost_per_token": 2.5e-07,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/mistralai/mistral-7b-instruct-v0.2": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-08,
+        "output_cost_per_token": 2.5e-07,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "replicate/mistralai/mixtral-8x7b-instruct-v0.1": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 1e-06,
+        "litellm_provider": "replicate",
+        "mode": "chat"
+    },
+    "openrouter/deepseek/deepseek-coder": {
+        "max_tokens": 4096,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.4e-07,
+        "output_cost_per_token": 2.8e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/microsoft/wizardlm-2-8x22b:nitro": {
+        "max_tokens": 65536,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 1e-06,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/google/gemini-pro-1.5": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 2.5e-06,
+        "output_cost_per_token": 7.5e-06,
+        "input_cost_per_image": 0.00265,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/mistralai/mixtral-8x22b-instruct": {
+        "max_tokens": 65536,
+        "input_cost_per_token": 6.5e-07,
+        "output_cost_per_token": 6.5e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/cohere/command-r-plus": {
+        "max_tokens": 128000,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/databricks/dbrx-instruct": {
+        "max_tokens": 32768,
+        "input_cost_per_token": 6e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/anthropic/claude-3-haiku": {
+        "max_tokens": 200000,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 1.25e-06,
+        "input_cost_per_image": 0.0004,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/anthropic/claude-3-haiku-20240307": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 1.25e-06,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 264
+    },
+    "anthropic/claude-3-5-sonnet-20241022": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "cache_creation_input_token_cost": 3.75e-06,
+        "cache_read_input_token_cost": 3e-07,
+        "litellm_provider": "anthropic",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 159,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
+    },
+    "anthropic/claude-3-5-sonnet-latest": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "cache_creation_input_token_cost": 3.75e-06,
+        "cache_read_input_token_cost": 3e-07,
+        "litellm_provider": "anthropic",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 159,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
+    },
+    "openrouter/anthropic/claude-3.5-sonnet": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 159,
+        "supports_assistant_prefill": true
+    },
+    "openrouter/anthropic/claude-3.5-sonnet:beta": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 159
+    },
+    "openrouter/anthropic/claude-3-sonnet": {
+        "max_tokens": 200000,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "input_cost_per_image": 0.0048,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/mistralai/mistral-large": {
+        "max_tokens": 32000,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/cognitivecomputations/dolphin-mixtral-8x7b": {
+        "max_tokens": 32769,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/google/gemini-pro-vision": {
+        "max_tokens": 45875,
+        "input_cost_per_token": 1.25e-07,
+        "output_cost_per_token": 3.75e-07,
+        "input_cost_per_image": 0.0025,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/fireworks/firellava-13b": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/meta-llama/llama-3-8b-instruct:free": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/meta-llama/llama-3-8b-instruct:extended": {
+        "max_tokens": 16384,
+        "input_cost_per_token": 2.25e-07,
+        "output_cost_per_token": 2.25e-06,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/meta-llama/llama-3-70b-instruct:nitro": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 9e-07,
+        "output_cost_per_token": 9e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/meta-llama/llama-3-70b-instruct": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 5.9e-07,
+        "output_cost_per_token": 7.9e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/openai/o1-mini": {
+        "max_tokens": 65536,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 65536,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.2e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/openai/o1-mini-2024-09-12": {
+        "max_tokens": 65536,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 65536,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.2e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/openai/o1-preview": {
+        "max_tokens": 32768,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 6e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/openai/o1-preview-2024-09-12": {
+        "max_tokens": 32768,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 6e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/openai/gpt-4o": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/openai/gpt-4o-2024-05-13": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/openai/gpt-4-vision-preview": {
+        "max_tokens": 130000,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 3e-05,
+        "input_cost_per_image": 0.01445,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/openai/gpt-3.5-turbo": {
+        "max_tokens": 4095,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/openai/gpt-3.5-turbo-16k": {
+        "max_tokens": 16383,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 4e-06,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/openai/gpt-4": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 3e-05,
+        "output_cost_per_token": 6e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/anthropic/claude-instant-v1": {
+        "max_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.63e-06,
+        "output_cost_per_token": 5.51e-06,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/anthropic/claude-2": {
+        "max_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.102e-05,
+        "output_cost_per_token": 3.268e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/anthropic/claude-3-opus": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 7.5e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 395
+    },
+    "openrouter/google/palm-2-chat-bison": {
+        "max_tokens": 25804,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/google/palm-2-codechat-bison": {
+        "max_tokens": 20070,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/meta-llama/llama-2-13b-chat": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/meta-llama/llama-2-70b-chat": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 1.5e-06,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/meta-llama/codellama-34b-instruct": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/nousresearch/nous-hermes-llama2-13b": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/mancer/weaver": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 5.625e-06,
+        "output_cost_per_token": 5.625e-06,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/gryphe/mythomax-l2-13b": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 1.875e-06,
+        "output_cost_per_token": 1.875e-06,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/jondurbin/airoboros-l2-70b-2.1": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 1.3875e-05,
+        "output_cost_per_token": 1.3875e-05,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/undi95/remm-slerp-l2-13b": {
+        "max_tokens": 6144,
+        "input_cost_per_token": 1.875e-06,
+        "output_cost_per_token": 1.875e-06,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/pygmalionai/mythalion-13b": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 1.875e-06,
+        "output_cost_per_token": 1.875e-06,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/mistralai/mistral-7b-instruct": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 1.3e-07,
+        "output_cost_per_token": 1.3e-07,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "openrouter/mistralai/mistral-7b-instruct:free": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
+    "j2-ultra": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "ai21",
+        "mode": "completion"
+    },
+    "jamba-1.5-mini@001": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 4e-07,
+        "litellm_provider": "ai21",
+        "mode": "chat"
+    },
+    "jamba-1.5-large@001": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2e-06,
+        "output_cost_per_token": 8e-06,
+        "litellm_provider": "ai21",
+        "mode": "chat"
+    },
+    "jamba-1.5": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 4e-07,
+        "litellm_provider": "ai21",
+        "mode": "chat"
+    },
+    "jamba-1.5-mini": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 4e-07,
+        "litellm_provider": "ai21",
+        "mode": "chat"
+    },
+    "jamba-1.5-large": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 2e-06,
+        "output_cost_per_token": 8e-06,
+        "litellm_provider": "ai21",
+        "mode": "chat"
+    },
+    "j2-mid": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1e-05,
+        "output_cost_per_token": 1e-05,
+        "litellm_provider": "ai21",
+        "mode": "completion"
+    },
+    "j2-light": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "ai21",
+        "mode": "completion"
+    },
+    "dolphin": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "nlp_cloud",
+        "mode": "completion"
+    },
+    "chatdolphin": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "nlp_cloud",
+        "mode": "chat"
+    },
+    "luminous-base": {
+        "max_tokens": 2048,
+        "input_cost_per_token": 3e-05,
+        "output_cost_per_token": 3.3e-05,
+        "litellm_provider": "aleph_alpha",
+        "mode": "completion"
+    },
+    "luminous-base-control": {
+        "max_tokens": 2048,
+        "input_cost_per_token": 3.75e-05,
+        "output_cost_per_token": 4.125e-05,
+        "litellm_provider": "aleph_alpha",
+        "mode": "chat"
+    },
+    "luminous-extended": {
+        "max_tokens": 2048,
+        "input_cost_per_token": 4.5e-05,
+        "output_cost_per_token": 4.95e-05,
+        "litellm_provider": "aleph_alpha",
+        "mode": "completion"
+    },
+    "luminous-extended-control": {
+        "max_tokens": 2048,
+        "input_cost_per_token": 5.625e-05,
+        "output_cost_per_token": 6.1875e-05,
+        "litellm_provider": "aleph_alpha",
+        "mode": "chat"
+    },
+    "luminous-supreme": {
+        "max_tokens": 2048,
+        "input_cost_per_token": 0.000175,
+        "output_cost_per_token": 0.0001925,
+        "litellm_provider": "aleph_alpha",
+        "mode": "completion"
+    },
+    "luminous-supreme-control": {
+        "max_tokens": 2048,
+        "input_cost_per_token": 0.00021875,
+        "output_cost_per_token": 0.000240625,
+        "litellm_provider": "aleph_alpha",
+        "mode": "chat"
+    },
+    "ai21.j2-mid-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.25e-05,
+        "output_cost_per_token": 1.25e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "ai21.j2-ultra-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.88e-05,
+        "output_cost_per_token": 1.88e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "ai21.jamba-instruct-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 70000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 7e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_system_messages": true
+    },
+    "amazon.titan-text-lite-v1": {
+        "max_tokens": 4000,
+        "max_input_tokens": 42000,
+        "max_output_tokens": 4000,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 4e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "amazon.titan-text-express-v1": {
+        "max_tokens": 8000,
+        "max_input_tokens": 42000,
+        "max_output_tokens": 8000,
+        "input_cost_per_token": 1.3e-06,
+        "output_cost_per_token": 1.7e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "amazon.titan-text-premier-v1:0": {
+        "max_tokens": 32000,
+        "max_input_tokens": 42000,
+        "max_output_tokens": 32000,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 1.5e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "amazon.titan-embed-text-v1": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "output_vector_size": 1536,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "bedrock",
+        "mode": "embedding"
+    },
+    "amazon.titan-embed-text-v2:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "output_vector_size": 1024,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "bedrock",
+        "mode": "embedding"
+    },
+    "mistral.mistral-7b-instruct-v0:2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "mistral.mixtral-8x7b-instruct-v0:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 4.5e-07,
+        "output_cost_per_token": 7e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "mistral.mistral-large-2402-v1:0": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "mistral.mistral-large-2407-v1:0": {
+        "max_tokens": 8191,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 9e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "mistral.mistral-small-2402-v1:0": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 4.5e-07,
+        "output_cost_per_token": 7e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/mistral.mixtral-8x7b-instruct-v0:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 4.5e-07,
+        "output_cost_per_token": 7e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-west-3/mistral.mixtral-8x7b-instruct-v0:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 5.9e-07,
+        "output_cost_per_token": 9.1e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/mistral.mistral-7b-instruct-v0:2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/mistral.mistral-7b-instruct-v0:2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-west-3/mistral.mistral-7b-instruct-v0:2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2.6e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/mistral.mistral-large-2402-v1:0": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "bedrock/eu-west-3/mistral.mistral-large-2402-v1:0": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.04e-05,
+        "output_cost_per_token": 3.12e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "anthropic.claude-3-sonnet-20240229-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "anthropic.claude-3-5-sonnet-20240620-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "anthropic.claude-3-5-sonnet-20241022-v2:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
+    "anthropic.claude-3-5-sonnet-latest-v2:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "anthropic.claude-3-haiku-20240307-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 1.25e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "anthropic.claude-3-opus-20240229-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 7.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "us.anthropic.claude-3-sonnet-20240229-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
+    "us.anthropic.claude-3-haiku-20240307-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 1.25e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "us.anthropic.claude-3-opus-20240229-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 7.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "eu.anthropic.claude-3-sonnet-20240229-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
+    "eu.anthropic.claude-3-haiku-20240307-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 1.25e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "eu.anthropic.claude-3-opus-20240229-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-05,
+        "output_cost_per_token": 7.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true
+    },
+    "anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0455,
+        "output_cost_per_second": 0.0455,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02527,
+        "output_cost_per_second": 0.02527,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0415,
+        "output_cost_per_second": 0.0415,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02305,
+        "output_cost_per_second": 0.02305,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/1-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/6-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/1-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/6-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0455,
+        "output_cost_per_second": 0.0455,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02527,
+        "output_cost_per_second": 0.02527,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0415,
+        "output_cost_per_second": 0.0415,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02305,
+        "output_cost_per_second": 0.02305,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0455,
+        "output_cost_per_second": 0.0455,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02527,
+        "output_cost_per_second": 0.02527,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-06,
+        "output_cost_per_token": 2.4e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0415,
+        "output_cost_per_second": 0.0415,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02305,
+        "output_cost_per_second": 0.02305,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.63e-06,
+        "output_cost_per_token": 5.51e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-07,
+        "output_cost_per_token": 2.4e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.011,
+        "output_cost_per_second": 0.011,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00611,
+        "output_cost_per_second": 0.00611,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.011,
+        "output_cost_per_second": 0.011,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00611,
+        "output_cost_per_second": 0.00611,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 8e-07,
+        "output_cost_per_token": 2.4e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2.23e-06,
+        "output_cost_per_token": 7.55e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.01475,
+        "output_cost_per_second": 0.01475,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.008194,
+        "output_cost_per_second": 0.008194,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2.48e-06,
+        "output_cost_per_token": 8.38e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.01635,
+        "output_cost_per_second": 0.01635,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 100000,
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.009083,
+        "output_cost_per_second": 0.009083,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "cohere.command-text-v14": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/*/1-month-commitment/cohere.command-text-v14": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_second": 0.011,
+        "output_cost_per_second": 0.011,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/*/6-month-commitment/cohere.command-text-v14": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_second": 0.0066027,
+        "output_cost_per_second": 0.0066027,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "cohere.command-light-text-v14": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/*/1-month-commitment/cohere.command-light-text-v14": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_second": 0.001902,
+        "output_cost_per_second": 0.001902,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/*/6-month-commitment/cohere.command-light-text-v14": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_second": 0.0011416,
+        "output_cost_per_second": 0.0011416,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "cohere.command-r-plus-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 1.5e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "cohere.command-r-v1:0": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 1.5e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "cohere.embed-english-v3": {
+        "max_tokens": 512,
+        "max_input_tokens": 512,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "bedrock",
+        "mode": "embedding"
+    },
+    "cohere.embed-multilingual-v3": {
+        "max_tokens": 512,
+        "max_input_tokens": 512,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "bedrock",
+        "mode": "embedding"
+    },
+    "meta.llama2-13b-chat-v1": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 7.5e-07,
+        "output_cost_per_token": 1e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "meta.llama2-70b-chat-v1": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.95e-06,
+        "output_cost_per_token": 2.56e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "meta.llama3-8b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/meta.llama3-8b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-1/meta.llama3-8b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.6e-07,
+        "output_cost_per_token": 7.2e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.5e-07,
+        "output_cost_per_token": 6.9e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-west-1/meta.llama3-8b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.2e-07,
+        "output_cost_per_token": 6.5e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-west-2/meta.llama3-8b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.9e-07,
+        "output_cost_per_token": 7.8e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/sa-east-1/meta.llama3-8b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 1.01e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "meta.llama3-70b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 2.65e-06,
+        "output_cost_per_token": 3.5e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/meta.llama3-70b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 2.65e-06,
+        "output_cost_per_token": 3.5e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-1/meta.llama3-70b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 2.65e-06,
+        "output_cost_per_token": 3.5e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.18e-06,
+        "output_cost_per_token": 4.2e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.05e-06,
+        "output_cost_per_token": 4.03e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 2.86e-06,
+        "output_cost_per_token": 3.78e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 3.45e-06,
+        "output_cost_per_token": 4.55e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 4.45e-06,
+        "output_cost_per_token": 5.88e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "meta.llama3-1-8b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 2048,
+        "input_cost_per_token": 2.2e-07,
+        "output_cost_per_token": 2.2e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "meta.llama3-1-70b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 2048,
+        "input_cost_per_token": 9.9e-07,
+        "output_cost_per_token": 9.9e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "meta.llama3-1-405b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5.32e-06,
+        "output_cost_per_token": 1.6e-05,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "meta.llama3-2-1b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 1e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "us.meta.llama3-2-1b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 1e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "eu.meta.llama3-2-1b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.3e-07,
+        "output_cost_per_token": 1.3e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "meta.llama3-2-3b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 1.5e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "us.meta.llama3-2-3b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 1.5e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "eu.meta.llama3-2-3b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.9e-07,
+        "output_cost_per_token": 1.9e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "meta.llama3-2-11b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3.5e-07,
+        "output_cost_per_token": 3.5e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "us.meta.llama3-2-11b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 3.5e-07,
+        "output_cost_per_token": 3.5e-07,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "meta.llama3-2-90b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "us.meta.llama3-2-90b-instruct-v1:0": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2e-06,
+        "output_cost_per_token": 2e-06,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": false
+    },
+    "512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
+        "max_tokens": 77,
+        "max_input_tokens": 77,
+        "output_cost_per_image": 0.018,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
+    "512-x-512/max-steps/stability.stable-diffusion-xl-v0": {
+        "max_tokens": 77,
+        "max_input_tokens": 77,
+        "output_cost_per_image": 0.036,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
+    "max-x-max/50-steps/stability.stable-diffusion-xl-v0": {
+        "max_tokens": 77,
+        "max_input_tokens": 77,
+        "output_cost_per_image": 0.036,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
+    "max-x-max/max-steps/stability.stable-diffusion-xl-v0": {
+        "max_tokens": 77,
+        "max_input_tokens": 77,
+        "output_cost_per_image": 0.072,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
+    "1024-x-1024/50-steps/stability.stable-diffusion-xl-v1": {
+        "max_tokens": 77,
+        "max_input_tokens": 77,
+        "output_cost_per_image": 0.04,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
+    "1024-x-1024/max-steps/stability.stable-diffusion-xl-v1": {
+        "max_tokens": 77,
+        "max_input_tokens": 77,
+        "output_cost_per_image": 0.08,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
+    "sagemaker/meta-textgeneration-llama-2-7b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "sagemaker",
+        "mode": "completion"
+    },
+    "sagemaker/meta-textgeneration-llama-2-7b-f": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "sagemaker",
+        "mode": "chat"
+    },
+    "sagemaker/meta-textgeneration-llama-2-13b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "sagemaker",
+        "mode": "completion"
+    },
+    "sagemaker/meta-textgeneration-llama-2-13b-f": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "sagemaker",
+        "mode": "chat"
+    },
+    "sagemaker/meta-textgeneration-llama-2-70b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "sagemaker",
+        "mode": "completion"
+    },
+    "sagemaker/meta-textgeneration-llama-2-70b-b-f": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "sagemaker",
+        "mode": "chat"
+    },
+    "together-ai-up-to-4b": {
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 1e-07,
+        "litellm_provider": "together_ai",
+        "mode": "chat"
+    },
+    "together-ai-4.1b-8b": {
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "together_ai",
+        "mode": "chat"
+    },
+    "together-ai-8.1b-21b": {
+        "max_tokens": 1000,
+        "input_cost_per_token": 3e-07,
+        "output_cost_per_token": 3e-07,
+        "litellm_provider": "together_ai",
+        "mode": "chat"
+    },
+    "together-ai-21.1b-41b": {
+        "input_cost_per_token": 8e-07,
+        "output_cost_per_token": 8e-07,
+        "litellm_provider": "together_ai",
+        "mode": "chat"
+    },
+    "together-ai-41.1b-80b": {
+        "input_cost_per_token": 9e-07,
+        "output_cost_per_token": 9e-07,
+        "litellm_provider": "together_ai",
+        "mode": "chat"
+    },
+    "together-ai-81.1b-110b": {
+        "input_cost_per_token": 1.8e-06,
+        "output_cost_per_token": 1.8e-06,
+        "litellm_provider": "together_ai",
+        "mode": "chat"
+    },
+    "together-ai-embedding-up-to-150m": {
+        "input_cost_per_token": 8e-09,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "together_ai",
+        "mode": "embedding"
+    },
+    "together-ai-embedding-151m-to-350m": {
+        "input_cost_per_token": 1.6e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "together_ai",
+        "mode": "embedding"
+    },
+    "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
+        "input_cost_per_token": 6e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "mode": "chat"
+    },
+    "together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "mode": "chat"
+    },
+    "together_ai/togethercomputer/CodeLlama-34b-Instruct": {
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "mode": "chat"
+    },
+    "ollama/codegemma": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion"
+    },
+    "ollama/codegeex4": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat",
+        "supports_function_calling": false
+    },
+    "ollama/deepseek-coder-v2-instruct": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "ollama/deepseek-coder-v2-base": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion",
+        "supports_function_calling": true
+    },
+    "ollama/deepseek-coder-v2-lite-instruct": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "ollama/deepseek-coder-v2-lite-base": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion",
+        "supports_function_calling": true
+    },
+    "ollama/internlm2_5-20b-chat": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "ollama/llama2": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/llama2:7b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/llama2:13b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/llama2:70b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/llama2-uncensored": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion"
+    },
+    "ollama/llama3": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/llama3:8b": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/llama3:70b": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/llama3.1": {
+        "max_tokens": 32768,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "ollama/mistral-large-instruct-2407": {
+        "max_tokens": 65536,
+        "max_input_tokens": 65536,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/mistral": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion"
+    },
+    "ollama/mistral-7B-Instruct-v0.1": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/mistral-7B-Instruct-v0.2": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/mixtral-8x7B-Instruct-v0.1": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/mixtral-8x22B-Instruct-v0.1": {
+        "max_tokens": 65536,
+        "max_input_tokens": 65536,
+        "max_output_tokens": 65536,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/codellama": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion"
+    },
+    "ollama/orca-mini": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion"
+    },
+    "ollama/vicuna": {
+        "max_tokens": 2048,
+        "max_input_tokens": 2048,
+        "max_output_tokens": 2048,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion"
+    },
+    "deepinfra/lizpreciatior/lzlv_70b_fp16_hf": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 7e-07,
+        "output_cost_per_token": 9e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/Gryphe/MythoMax-L2-13b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.2e-07,
+        "output_cost_per_token": 2.2e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/mistralai/Mistral-7B-Instruct-v0.1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 1.3e-07,
+        "output_cost_per_token": 1.3e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/meta-llama/Llama-2-70b-chat-hf": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 7e-07,
+        "output_cost_per_token": 9e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/cognitivecomputations/dolphin-2.6-mixtral-8x7b": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2.7e-07,
+        "output_cost_per_token": 2.7e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/codellama/CodeLlama-34b-Instruct-hf": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 6e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/deepinfra/mixtral": {
+        "max_tokens": 4096,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.7e-07,
+        "output_cost_per_token": 2.7e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "completion"
+    },
+    "deepinfra/Phind/Phind-CodeLlama-34B-v2": {
+        "max_tokens": 4096,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 6e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2.7e-07,
+        "output_cost_per_token": 2.7e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/deepinfra/airoboros-70b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 7e-07,
+        "output_cost_per_token": 9e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/01-ai/Yi-34B-Chat": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 6e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/01-ai/Yi-6B-200K": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.3e-07,
+        "output_cost_per_token": 1.3e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "completion"
+    },
+    "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 7e-07,
+        "output_cost_per_token": 9e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/meta-llama/Llama-2-13b-chat-hf": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.2e-07,
+        "output_cost_per_token": 2.2e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/amazon/MistralLite": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/meta-llama/Llama-2-7b-chat-hf": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.3e-07,
+        "output_cost_per_token": 1.3e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 8e-08,
+        "output_cost_per_token": 8e-08,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/meta-llama/Meta-Llama-3-70B-Instruct": {
+        "max_tokens": 8191,
+        "max_input_tokens": 8191,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5.9e-07,
+        "output_cost_per_token": 7.9e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/01-ai/Yi-34B-200K": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 6e-07,
+        "output_cost_per_token": 6e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "completion"
+    },
+    "deepinfra/openchat/openchat_3.5": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.3e-07,
+        "output_cost_per_token": 1.3e-07,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "perplexity/codellama-34b-instruct": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 3.5e-07,
+        "output_cost_per_token": 1.4e-06,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/codellama-70b-instruct": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 7e-07,
+        "output_cost_per_token": 2.8e-06,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/llama-3.1-70b-instruct": {
+        "max_tokens": 131072,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 131072,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 1e-06,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/llama-3.1-8b-instruct": {
+        "max_tokens": 131072,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 131072,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/llama-3.1-sonar-huge-128k-online": {
+        "max_tokens": 127072,
+        "max_input_tokens": 127072,
+        "max_output_tokens": 127072,
+        "input_cost_per_token": 5e-06,
+        "output_cost_per_token": 5e-06,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/llama-3.1-sonar-large-128k-online": {
+        "max_tokens": 127072,
+        "max_input_tokens": 127072,
+        "max_output_tokens": 127072,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 1e-06,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/llama-3.1-sonar-large-128k-chat": {
+        "max_tokens": 131072,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 131072,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 1e-06,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/llama-3.1-sonar-small-128k-chat": {
+        "max_tokens": 131072,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 131072,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/llama-3.1-sonar-small-128k-online": {
+        "max_tokens": 127072,
+        "max_input_tokens": 127072,
+        "max_output_tokens": 127072,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/pplx-7b-chat": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 7e-08,
+        "output_cost_per_token": 2.8e-07,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/pplx-70b-chat": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 7e-07,
+        "output_cost_per_token": 2.8e-06,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/pplx-7b-online": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 2.8e-07,
+        "input_cost_per_request": 0.005,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/pplx-70b-online": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 2.8e-06,
+        "input_cost_per_request": 0.005,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/llama-2-70b-chat": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 7e-07,
+        "output_cost_per_token": 2.8e-06,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/mistral-7b-instruct": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 7e-08,
+        "output_cost_per_token": 2.8e-07,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/mixtral-8x7b-instruct": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 7e-08,
+        "output_cost_per_token": 2.8e-07,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/sonar-small-chat": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 7e-08,
+        "output_cost_per_token": 2.8e-07,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/sonar-small-online": {
+        "max_tokens": 12000,
+        "max_input_tokens": 12000,
+        "max_output_tokens": 12000,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 2.8e-07,
+        "input_cost_per_request": 0.005,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/sonar-medium-chat": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 6e-07,
+        "output_cost_per_token": 1.8e-06,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "perplexity/sonar-medium-online": {
+        "max_tokens": 12000,
+        "max_input_tokens": 12000,
+        "max_output_tokens": 12000,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 1.8e-06,
+        "input_cost_per_request": 0.005,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
+    "fireworks_ai/accounts/fireworks/models/llama-v3p2-1b-instruct": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 1e-07,
+        "litellm_provider": "fireworks_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/accounts/fireworks/models/llama-v3p2-3b-instruct": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 1e-07,
+        "litellm_provider": "fireworks_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/accounts/fireworks/models/llama-v3p2-11b-vision-instruct": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "fireworks_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://fireworks.ai/pricing"
+    },
+    "accounts/fireworks/models/llama-v3p2-90b-vision-instruct": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 9e-07,
+        "output_cost_per_token": 9e-07,
+        "litellm_provider": "fireworks_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/accounts/fireworks/models/firefunction-v2": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 9e-07,
+        "output_cost_per_token": 9e-07,
+        "litellm_provider": "fireworks_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": {
+        "max_tokens": 65536,
+        "max_input_tokens": 65536,
+        "max_output_tokens": 65536,
+        "input_cost_per_token": 1.2e-06,
+        "output_cost_per_token": 1.2e-06,
+        "litellm_provider": "fireworks_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/accounts/fireworks/models/qwen2-72b-instruct": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 9e-07,
+        "output_cost_per_token": 9e-07,
+        "litellm_provider": "fireworks_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/accounts/fireworks/models/yi-large": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 3e-06,
+        "output_cost_per_token": 3e-06,
+        "litellm_provider": "fireworks_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": {
+        "max_tokens": 65536,
+        "max_input_tokens": 65536,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1.2e-06,
+        "output_cost_per_token": 1.2e-06,
+        "litellm_provider": "fireworks_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/nomic-ai/nomic-embed-text-v1.5": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "input_cost_per_token": 8e-09,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "fireworks_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/nomic-ai/nomic-embed-text-v1": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "input_cost_per_token": 8e-09,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "fireworks_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/WhereIsAI/UAE-Large-V1": {
+        "max_tokens": 512,
+        "max_input_tokens": 512,
+        "input_cost_per_token": 1.6e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "fireworks_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/thenlper/gte-large": {
+        "max_tokens": 512,
+        "max_input_tokens": 512,
+        "input_cost_per_token": 1.6e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "fireworks_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks_ai/thenlper/gte-base": {
+        "max_tokens": 512,
+        "max_input_tokens": 512,
+        "input_cost_per_token": 8e-09,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "fireworks_ai-embedding-models",
+        "mode": "embedding",
+        "source": "https://fireworks.ai/pricing"
+    },
+    "fireworks-ai-up-to-16b": {
+        "input_cost_per_token": 2e-07,
+        "output_cost_per_token": 2e-07,
+        "litellm_provider": "fireworks_ai"
+    },
+    "fireworks-ai-16.1b-to-80b": {
+        "input_cost_per_token": 9e-07,
+        "output_cost_per_token": 9e-07,
+        "litellm_provider": "fireworks_ai"
+    },
+    "fireworks-ai-moe-up-to-56b": {
+        "input_cost_per_token": 5e-07,
+        "output_cost_per_token": 5e-07,
+        "litellm_provider": "fireworks_ai"
+    },
+    "fireworks-ai-56b-to-176b": {
+        "input_cost_per_token": 1.2e-06,
+        "output_cost_per_token": 1.2e-06,
+        "litellm_provider": "fireworks_ai"
+    },
+    "fireworks-ai-default": {
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "fireworks_ai"
+    },
+    "fireworks-ai-embedding-up-to-150m": {
+        "input_cost_per_token": 8e-09,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "fireworks_ai-embedding-models"
+    },
+    "fireworks-ai-embedding-150m-to-350m": {
+        "input_cost_per_token": 1.6e-08,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "fireworks_ai-embedding-models"
+    },
+    "anyscale/mistralai/Mistral-7B-Instruct-v0.1": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 1.5e-07,
+        "litellm_provider": "anyscale",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mistral-7B-Instruct-v0.1"
+    },
+    "anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 1.5e-07,
+        "litellm_provider": "anyscale",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mixtral-8x7B-Instruct-v0.1"
+    },
+    "anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1": {
+        "max_tokens": 65536,
+        "max_input_tokens": 65536,
+        "max_output_tokens": 65536,
+        "input_cost_per_token": 9e-07,
+        "output_cost_per_token": 9e-07,
+        "litellm_provider": "anyscale",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mixtral-8x22B-Instruct-v0.1"
+    },
+    "anyscale/HuggingFaceH4/zephyr-7b-beta": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 1.5e-07,
+        "litellm_provider": "anyscale",
+        "mode": "chat"
+    },
+    "anyscale/google/gemma-7b-it": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 1.5e-07,
+        "litellm_provider": "anyscale",
+        "mode": "chat",
+        "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/google-gemma-7b-it"
+    },
+    "anyscale/meta-llama/Llama-2-7b-chat-hf": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 1.5e-07,
+        "litellm_provider": "anyscale",
+        "mode": "chat"
+    },
+    "anyscale/meta-llama/Llama-2-13b-chat-hf": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 2.5e-07,
+        "output_cost_per_token": 2.5e-07,
+        "litellm_provider": "anyscale",
+        "mode": "chat"
+    },
+    "anyscale/meta-llama/Llama-2-70b-chat-hf": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 1e-06,
+        "litellm_provider": "anyscale",
+        "mode": "chat"
+    },
+    "anyscale/codellama/CodeLlama-34b-Instruct-hf": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 1e-06,
+        "litellm_provider": "anyscale",
+        "mode": "chat"
+    },
+    "anyscale/codellama/CodeLlama-70b-Instruct-hf": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 1e-06,
+        "litellm_provider": "anyscale",
+        "mode": "chat",
+        "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/codellama-CodeLlama-70b-Instruct-hf"
+    },
+    "anyscale/meta-llama/Meta-Llama-3-8B-Instruct": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1.5e-07,
+        "output_cost_per_token": 1.5e-07,
+        "litellm_provider": "anyscale",
+        "mode": "chat",
+        "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-8B-Instruct"
+    },
+    "anyscale/meta-llama/Meta-Llama-3-70B-Instruct": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 1e-06,
+        "litellm_provider": "anyscale",
+        "mode": "chat",
+        "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-70B-Instruct"
+    },
+    "cloudflare/@cf/meta/llama-2-7b-chat-fp16": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "max_output_tokens": 3072,
+        "input_cost_per_token": 1.923e-06,
+        "output_cost_per_token": 1.923e-06,
+        "litellm_provider": "cloudflare",
+        "mode": "chat"
+    },
+    "cloudflare/@cf/meta/llama-2-7b-chat-int8": {
+        "max_tokens": 2048,
+        "max_input_tokens": 2048,
+        "max_output_tokens": 2048,
+        "input_cost_per_token": 1.923e-06,
+        "output_cost_per_token": 1.923e-06,
+        "litellm_provider": "cloudflare",
+        "mode": "chat"
+    },
+    "cloudflare/@cf/mistral/mistral-7b-instruct-v0.1": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 1.923e-06,
+        "output_cost_per_token": 1.923e-06,
+        "litellm_provider": "cloudflare",
+        "mode": "chat"
+    },
+    "cloudflare/@hf/thebloke/codellama-7b-instruct-awq": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 1.923e-06,
+        "output_cost_per_token": 1.923e-06,
+        "litellm_provider": "cloudflare",
+        "mode": "chat"
+    },
+    "voyage/voyage-01": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "voyage",
+        "mode": "embedding"
+    },
+    "voyage/voyage-lite-01": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "voyage",
+        "mode": "embedding"
+    },
+    "voyage/voyage-large-2": {
+        "max_tokens": 16000,
+        "max_input_tokens": 16000,
+        "input_cost_per_token": 1.2e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "voyage",
+        "mode": "embedding"
+    },
+    "voyage/voyage-law-2": {
+        "max_tokens": 16000,
+        "max_input_tokens": 16000,
+        "input_cost_per_token": 1.2e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "voyage",
+        "mode": "embedding"
+    },
+    "voyage/voyage-code-2": {
+        "max_tokens": 16000,
+        "max_input_tokens": 16000,
+        "input_cost_per_token": 1.2e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "voyage",
+        "mode": "embedding"
+    },
+    "voyage/voyage-2": {
+        "max_tokens": 4000,
+        "max_input_tokens": 4000,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "voyage",
+        "mode": "embedding"
+    },
+    "voyage/voyage-lite-02-instruct": {
+        "max_tokens": 4000,
+        "max_input_tokens": 4000,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "voyage",
+        "mode": "embedding"
+    },
+    "voyage/voyage-finance-2": {
+        "max_tokens": 4000,
+        "max_input_tokens": 4000,
+        "input_cost_per_token": 1.2e-07,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "voyage",
+        "mode": "embedding"
+    },
+    "databricks/databricks-meta-llama-3-1-405b-instruct": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 5e-06,
+        "input_dbu_cost_per_token": 7.1429e-05,
+        "output_cost_per_token": 1.500002e-05,
+        "output_db_cost_per_token": 0.000214286,
+        "litellm_provider": "databricks",
+        "mode": "chat",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {
+            "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
+        }
+    },
+    "databricks/databricks-meta-llama-3-1-70b-instruct": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 1.00002e-06,
+        "input_dbu_cost_per_token": 1.4286e-05,
+        "output_cost_per_token": 2.99999e-06,
+        "output_dbu_cost_per_token": 4.2857e-05,
+        "litellm_provider": "databricks",
+        "mode": "chat",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {
+            "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
+        }
+    },
+    "databricks/databricks-dbrx-instruct": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 7.4998e-07,
+        "input_dbu_cost_per_token": 1.0714e-05,
+        "output_cost_per_token": 2.24901e-06,
+        "output_dbu_cost_per_token": 3.2143e-05,
+        "litellm_provider": "databricks",
+        "mode": "chat",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {
+            "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
+        }
+    },
+    "databricks/databricks-meta-llama-3-70b-instruct": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 1.00002e-06,
+        "input_dbu_cost_per_token": 1.4286e-05,
+        "output_cost_per_token": 2.99999e-06,
+        "output_dbu_cost_per_token": 4.2857e-05,
+        "litellm_provider": "databricks",
+        "mode": "chat",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {
+            "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
+        }
+    },
+    "databricks/databricks-llama-2-70b-chat": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5.0001e-07,
+        "input_dbu_cost_per_token": 7.143e-06,
+        "output_cost_per_token": 1.5e-06,
+        "output_dbu_cost_per_token": 2.1429e-05,
+        "litellm_provider": "databricks",
+        "mode": "chat",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {
+            "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
+        }
+    },
+    "databricks/databricks-mixtral-8x7b-instruct": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 5.0001e-07,
+        "input_dbu_cost_per_token": 7.143e-06,
+        "output_cost_per_token": 9.9902e-07,
+        "output_dbu_cost_per_token": 1.4286e-05,
+        "litellm_provider": "databricks",
+        "mode": "chat",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {
+            "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
+        }
+    },
+    "databricks/databricks-mpt-30b-instruct": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 9.9902e-07,
+        "input_dbu_cost_per_token": 1.4286e-05,
+        "output_cost_per_token": 9.9902e-07,
+        "output_dbu_cost_per_token": 1.4286e-05,
+        "litellm_provider": "databricks",
+        "mode": "chat",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {
+            "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
+        }
+    },
+    "databricks/databricks-mpt-7b-instruct": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 5.0001e-07,
+        "input_dbu_cost_per_token": 7.143e-06,
+        "output_cost_per_token": 0.0,
+        "output_dbu_cost_per_token": 0.0,
+        "litellm_provider": "databricks",
+        "mode": "chat",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {
+            "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
+        }
+    },
+    "databricks/databricks-bge-large-en": {
+        "max_tokens": 512,
+        "max_input_tokens": 512,
+        "output_vector_size": 1024,
+        "input_cost_per_token": 1.0003e-07,
+        "input_dbu_cost_per_token": 1.429e-06,
+        "output_cost_per_token": 0.0,
+        "output_dbu_cost_per_token": 0.0,
+        "litellm_provider": "databricks",
+        "mode": "embedding",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {
+            "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
+        }
+    },
+    "databricks/databricks-gte-large-en": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "output_vector_size": 1024,
+        "input_cost_per_token": 1.2999e-07,
+        "input_dbu_cost_per_token": 1.857e-06,
+        "output_cost_per_token": 0.0,
+        "output_dbu_cost_per_token": 0.0,
+        "litellm_provider": "databricks",
+        "mode": "embedding",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {
+            "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
+        }
+    },
+    "azure/gpt-4o-mini-2024-07-18": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 1.65e-07,
+        "output_cost_per_token": 6.6e-07,
+        "cache_read_input_token_cost": 7.5e-08,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
+    "amazon.titan-embed-image-v1": {
+        "max_tokens": 128,
+        "max_input_tokens": 128,
+        "output_vector_size": 1024,
+        "input_cost_per_token": 8e-07,
+        "input_cost_per_image": 6e-05,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "bedrock",
+        "supports_image_input": true,
+        "mode": "embedding",
+        "source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=amazon.titan-image-generator-v1"
+    }
+}
\ No newline at end of file
diff --git a/llm_api/mongodb_cache.py b/llm_api/mongodb_cache.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d2dbe867f613c0930f56fa43e24d2257a4d392d
--- /dev/null
+++ b/llm_api/mongodb_cache.py
@@ -0,0 +1,127 @@
+import time
+import functools
+from typing import Generator, Any
+from pymongo import MongoClient
+import hashlib
+import json
+import datetime
+import random
+
+from config import ENABLE_MONOGODB, MONOGODB_DB_NAME, ENABLE_MONOGODB_CACHE, CACHE_REPLAY_SPEED, CACHE_REPLAY_MAX_DELAY
+
+from .chat_messages import ChatMessages
+from .mongodb_cost import record_api_cost, check_cost_limits
+from .mongodb_init import mongo_client as client
+
+def create_cache_key(func_name: str, args: tuple, kwargs: dict) -> str:
+    """创建缓存键"""
+    # 将参数转换为可序列化的格式
+    cache_dict = {
+        'func_name': func_name,
+        'args': args,
+        'kwargs': kwargs
+    }
+    # 转换为JSON字符串并创建哈希
+    cache_string = json.dumps(cache_dict, sort_keys=True)
+    return hashlib.md5(cache_string.encode()).hexdigest()
+
+
+
+def llm_api_cache():
+    """MongoDB缓存装饰器"""
+    db_name=MONOGODB_DB_NAME
+    collection_name='stream_chat'
+    
+    def dummy_decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            # 移除 use_cache 参数，避免传递给原函数
+            kwargs.pop('use_cache', None)
+            return func(*args, **kwargs)
+        return wrapper
+    
+
+    if not ENABLE_MONOGODB:
+        return dummy_decorator
+    
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            check_cost_limits()
+
+            use_cache = kwargs.pop('use_cache', True)   # pop很重要
+            
+            if not ENABLE_MONOGODB_CACHE:
+                use_cache = False
+
+            db = client[db_name]
+            collection = db[collection_name]
+            
+            # 创建缓存键
+            cache_key = create_cache_key(func.__name__, args, kwargs)
+            
+            # 检查缓存
+            if use_cache:
+                cached_data = list(collection.aggregate([
+                    {'$match': {'cache_key': cache_key}},
+                    {'$sample': {'size': 1}}
+                ]))
+                cached_data = cached_data[0] if cached_data else None
+                if cached_data:
+                    # 如果有缓存，yield缓存的结果
+                    messages = ChatMessages(cached_data['return_value'])
+                    messages.model = args[0]['model']
+                    for item in cached_data['yields']:
+                        sacled_delay = min(item['delay'] / CACHE_REPLAY_SPEED, CACHE_REPLAY_MAX_DELAY)
+                        if sacled_delay > 0: time.sleep(sacled_delay)  # 应用加速倍数
+                        else: continue
+                        if item['index'] > 0:
+                            yield messages.prompt_messages + [{'role': 'assistant', 'content': messages.response[:item['index']]}]
+                        else:
+                            yield messages.prompt_messages
+                    messages.finished = True
+                    yield messages
+                    return messages
+            
+            # 如果没有缓存，执行原始函数并记录结果
+            yields_data = []
+            last_time = time.time()
+            
+            generator = func(*args, **kwargs)
+            
+            try:
+                while True:
+                    current_time = time.time()
+                    value = next(generator)
+                    delay = current_time - last_time
+                    
+                    yields_data.append({
+                        'index': len(value.response),
+                        'delay': delay
+                    })
+                    
+                    last_time = current_time
+                    yield value
+                    
+            except StopIteration as e:
+                return_value = e.value
+                
+                # 记录API调用费用
+                record_api_cost(return_value)
+                
+                # 存储到MongoDB
+                cache_data = {
+                    'created_at':datetime.datetime.now(),
+                    'return_value': return_value,
+                    'func_name': func.__name__,
+                    'args': args,
+                    'kwargs': kwargs,
+                    'yields': yields_data,
+                    'cache_key': cache_key,
+                }
+                collection.insert_one(cache_data)
+                
+                return return_value
+            
+        return wrapper
+    return decorator
diff --git a/llm_api/mongodb_cost.py b/llm_api/mongodb_cost.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4acc01da2763c9f24eda0fff054643518ff9039
--- /dev/null
+++ b/llm_api/mongodb_cost.py
@@ -0,0 +1,121 @@
+import datetime
+
+from config import API_COST_LIMITS, MONOGODB_DB_NAME
+
+from .chat_messages import ChatMessages
+from .mongodb_init import mongo_client as client
+
+def record_api_cost(messages: ChatMessages):
+    """记录API调用费用"""
+
+    db = client[MONOGODB_DB_NAME]
+    collection = db['api_cost']
+
+    cost_data = {
+        'created_at': datetime.datetime.now(),
+        'model': messages.model,
+        'cost': messages.cost,
+        'currency_symbol': messages.currency_symbol,
+        'input_tokens': messages[:-1].count_message_tokens(),
+        'output_tokens': messages[-1:].count_message_tokens(),
+        'total_tokens': messages.count_message_tokens()
+    }
+    collection.insert_one(cost_data)
+
+def get_model_cost_stats(start_date: datetime.datetime, end_date: datetime.datetime) -> list:
+    """获取指定时间段内的模型调用费用统计"""
+    pipeline = [
+        {
+            '$match': {
+                'created_at': {
+                    '$gte': start_date,
+                    '$lte': end_date
+                }
+            }
+        },
+        {
+            '$group': {
+                '_id': '$model',
+                'total_cost': { '$sum': '$cost' },
+                'total_calls': { '$sum': 1 },
+                'total_input_tokens': { '$sum': '$input_tokens' },
+                'total_output_tokens': { '$sum': '$output_tokens' },
+                'total_tokens': { '$sum': '$total_tokens' },
+                'avg_cost_per_call': { '$avg': '$cost' },
+                'currency_symbol': { '$first': '$currency_symbol' }
+            }
+        },
+        {
+            '$project': {
+                'model': '$_id',
+                'total_cost': { '$round': ['$total_cost', 4] },
+                'total_calls': 1,
+                'total_input_tokens': 1,
+                'total_output_tokens': 1,
+                'total_tokens': 1,
+                'avg_cost_per_call': { '$round': ['$avg_cost_per_call', 4] },
+                'currency_symbol': 1,
+                '_id': 0
+            }
+        },
+        {
+            '$sort': { 'total_cost': -1 }
+        }
+    ]
+    
+    # 直接从 api_cost 集合查询数据
+    db = client[MONOGODB_DB_NAME]
+    collection = db['api_cost']
+
+    stats = list(collection.aggregate(pipeline))
+    return stats
+
+# 使用示例：
+def print_cost_report(days: int = 30, hours: int = 0):
+    """打印最近N天的费用报告"""
+    end_date = datetime.datetime.now()
+    start_date = end_date - datetime.timedelta(days=days, hours=hours)
+    
+    stats = get_model_cost_stats(start_date, end_date)
+    
+    print(f"\n=== API Cost Report ({start_date.date()} to {end_date.date()}) ===")
+    for model_stat in stats:
+        print(f"\nModel: {model_stat['model']}")
+        print(f"Total Cost: {model_stat['currency_symbol']}{model_stat['total_cost']:.4f}")
+        print(f"Total Calls: {model_stat['total_calls']}")
+        print(f"Total Tokens: {model_stat['total_tokens']:,}")
+        print(f"Avg Cost/Call: {model_stat['currency_symbol']}{model_stat['avg_cost_per_call']:.4f}")
+
+def check_cost_limits() -> bool:
+    """
+    检查API调用费用是否超过限制
+    返回: 如果未超过限制返回True，否则返回False
+    """
+    now = datetime.datetime.now()
+    hour_ago = now - datetime.timedelta(hours=1)
+    day_ago = now - datetime.timedelta(days=1)
+    
+    # 获取统计数据
+    hour_stats = get_model_cost_stats(hour_ago, now)
+    day_stats = get_model_cost_stats(day_ago, now)
+    
+    # 计算总费用并根据需要转换为人民币
+    hour_total_rmb = sum(
+        stat['total_cost'] * (API_COST_LIMITS['USD_TO_RMB_RATE'] if stat['currency_symbol'] == '$' else 1)
+        for stat in hour_stats
+    )
+    day_total_rmb = sum(
+        stat['total_cost'] * (API_COST_LIMITS['USD_TO_RMB_RATE'] if stat['currency_symbol'] == '$' else 1)
+        for stat in day_stats
+    )
+    
+    # 检查是否超过限制
+    if hour_total_rmb >= API_COST_LIMITS['HOURLY_LIMIT_RMB']:
+        print(f"警告：最近1小时API费用（￥{hour_total_rmb:.2f}）超过限制（￥{API_COST_LIMITS['HOURLY_LIMIT_RMB']}）")
+        raise Exception("最近1小时内API调用费用超过设定上限！")
+    
+    if day_total_rmb >= API_COST_LIMITS['DAILY_LIMIT_RMB']:
+        print(f"警告：最近24小时API费用（￥{day_total_rmb:.2f}）超过限制（￥{API_COST_LIMITS['DAILY_LIMIT_RMB']}）")
+        raise Exception("最近1天内API调用费用超过设定上限！")
+    
+    return True
\ No newline at end of file
diff --git a/llm_api/mongodb_init.py b/llm_api/mongodb_init.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a5f44aba59590d80b8af8843a49318b7af2db77
--- /dev/null
+++ b/llm_api/mongodb_init.py
@@ -0,0 +1,7 @@
+import os
+from config import ENABLE_MONOGODB
+from pymongo import MongoClient
+
+# 从环境变量获取 MongoDB URI，如果没有则使用默认值
+mongo_uri = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/')
+mongo_client = MongoClient(mongo_uri) if ENABLE_MONOGODB else None
diff --git a/llm_api/openai_api.py b/llm_api/openai_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..c52ac36ab6252ea2478542336358d0073081d8bb
--- /dev/null
+++ b/llm_api/openai_api.py
@@ -0,0 +1,67 @@
+import httpx
+from openai import OpenAI
+from .chat_messages import ChatMessages
+
+# Pricing reference: https://openai.com/api/pricing/
+gpt_model_config = {
+    "gpt-4o": {
+        "Pricing": (2.50/1000, 10.00/1000),
+        "currency_symbol": '$',
+    },
+    "gpt-4o-mini": {
+        "Pricing": (0.15/1000, 0.60/1000),
+        "currency_symbol": '$',
+    },
+    "o1-preview": {
+        "Pricing": (15/1000, 60/1000),
+        "currency_symbol": '$',
+    },
+    "o1-mini": {
+        "Pricing": (3/1000, 12/1000),
+        "currency_symbol": '$',
+    },
+}
+# https://platform.openai.com/docs/guides/reasoning
+
+def stream_chat_with_gpt(messages, model='gpt-3.5-turbo-1106', response_json=False, api_key=None, base_url=None, max_tokens=4_096, n=1, proxies=None):
+    if api_key is None:
+        raise Exception('未提供有效的 api_key！')
+    
+    client_params = {
+        "api_key": api_key,
+    }
+
+    if base_url:
+        client_params['base_url'] = base_url
+
+    if proxies:
+        httpx_client = httpx.Client(proxy=proxies)
+        client_params["http_client"] = httpx_client
+    
+    client = OpenAI(**client_params)
+
+    if model in ['o1-preview', ] and messages[0]['role'] == 'system':
+        messages[0:1] = [{'role': 'user', 'content': messages[0]['content']}, {'role': 'assistant', 'content': ''}]
+    
+    chatstream = client.chat.completions.create(
+        stream=True,
+        model=model, 
+        messages=messages, 
+        max_tokens=max_tokens,
+        response_format={ "type": "json_object" } if response_json else None,
+        n=n
+    )
+    
+    messages.append({'role': 'assistant', 'content': ''})
+    content = ['' for _ in range(n)]
+    for part in chatstream:
+        for choice in part.choices:
+            content[choice.index] += choice.delta.content or ''
+            messages[-1]['content'] = content if n > 1 else content[0]
+            yield messages
+    
+    return messages
+
+    
+if __name__ == '__main__':
+    pass
diff --git a/llm_api/sparkai_api.py b/llm_api/sparkai_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..9af137f3be07fa062762661c01f565bff9b7a143
--- /dev/null
+++ b/llm_api/sparkai_api.py
@@ -0,0 +1,66 @@
+from sparkai.llm.llm import ChatSparkLLM, ChunkPrintHandler
+from sparkai.core.messages import ChatMessage as SparkMessage
+
+#星火认知大模型Spark Max的URL值，其他版本大模型URL值请前往文档（https://www.xfyun.cn/doc/spark/Web.html）查看
+SPARKAI_URL = 'wss://spark-api.xf-yun.com/v4.0/chat'
+#星火认知大模型调用秘钥信息，请前往讯飞开放平台控制台（https://console.xfyun.cn/services/bm35）查看
+SPARKAI_APP_ID = '01793781'
+SPARKAI_API_SECRET = 'YzJkNTI5N2Q5NDY4N2RlNWI5YjA5ZDM4'
+SPARKAI_API_KEY = '5dd33ea830aff0c9dff18e2561a5e6c7'
+#星火认知大模型Spark Max的domain值，其他版本大模型domain值请前往文档（https://www.xfyun.cn/doc/spark/Web.html）查看
+SPARKAI_DOMAIN = '4.0Ultra'
+
+"""
+5dd33ea830aff0c9dff18e2561a5e6c7&YzJkNTI5N2Q5NDY4N2RlNWI5YjA5ZDM4&01793781
+
+domain值:
+lite指向Lite版本;
+generalv3指向Pro版本;
+pro-128k指向Pro-128K版本;
+generalv3.5指向Max版本;
+max-32k指向Max-32K版本;
+4.0Ultra指向4.0 Ultra版本;
+
+
+Spark4.0 Ultra 请求地址，对应的domain参数为4.0Ultra：
+wss://spark-api.xf-yun.com/v4.0/chat
+Spark Max-32K请求地址，对应的domain参数为max-32k
+wss://spark-api.xf-yun.com/chat/max-32k
+Spark Max请求地址，对应的domain参数为generalv3.5
+wss://spark-api.xf-yun.com/v3.5/chat
+Spark Pro-128K请求地址，对应的domain参数为pro-128k：
+wss://spark-api.xf-yun.com/chat/pro-128k
+Spark Pro请求地址，对应的domain参数为generalv3：
+wss://spark-api.xf-yun.com/v3.1/chat
+Spark Lite请求地址，对应的domain参数为lite：
+wss://spark-api.xf-yun.com/v1.1/chat
+"""
+
+
+sparkai_model_config = {
+    "spark-4.0-ultra": {
+        "Pricing": (0, 0),
+        "currency_symbol": '￥',
+        "url": "wss://spark-api.xf-yun.com/v4.0/chat",
+        "domain": "4.0Ultra"
+    }
+}
+
+
+
+if __name__ == '__main__':
+    spark = ChatSparkLLM(
+        spark_api_url=SPARKAI_URL,
+        spark_app_id=SPARKAI_APP_ID,
+        spark_api_key=SPARKAI_API_KEY,
+        spark_api_secret=SPARKAI_API_SECRET,
+        spark_llm_domain=SPARKAI_DOMAIN,
+        streaming=True,
+    )
+    messages = [SparkMessage(
+        role="user",
+        content='你好呀'
+    )]
+    a = spark.stream(messages)
+    for message in a:
+        print(message)  
\ No newline at end of file
diff --git a/llm_api/zhipuai_api.py b/llm_api/zhipuai_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..457fce786b55c3b24e73a3b644e724c17fcb3043
--- /dev/null
+++ b/llm_api/zhipuai_api.py
@@ -0,0 +1,54 @@
+from zhipuai import ZhipuAI
+from .chat_messages import ChatMessages
+
+# Pricing
+# https://open.bigmodel.cn/pricing
+# GLM-4-Plus 0.05￥/1000 tokens, GLM-4-Air 0.001￥/1000 tokens, GLM-4-FlashX 0.0001￥/1000 tokens, , GLM-4-Flash 0￥/1000 tokens
+
+# Models
+# https://bigmodel.cn/dev/howuse/model
+# glm-4-plus、glm-4-air、 glm-4-flashx 、 glm-4-flash
+
+
+
+zhipuai_model_config = {
+    "glm-4-plus": {
+        "Pricing": (0.05, 0.05),
+        "currency_symbol": '￥',
+    },
+    "glm-4-air": {
+        "Pricing": (0.001, 0.001),
+        "currency_symbol": '￥',
+    },
+    "glm-4-flashx": {
+        "Pricing": (0.0001, 0.0001),
+        "currency_symbol": '￥',
+    },
+    "glm-4-flash": {
+        "Pricing": (0, 0),
+        "currency_symbol": '￥',
+    },
+}
+
+def stream_chat_with_zhipuai(messages, model='glm-4-flash', response_json=False, api_key=None, max_tokens=4_096):
+    if api_key is None:
+        raise Exception('未提供有效的 api_key！')
+    
+    client = ZhipuAI(api_key=api_key)
+
+    response = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        stream=True,
+        max_tokens=max_tokens
+    )
+    
+    messages.append({'role': 'assistant', 'content': ''})
+    for chunk in response:
+        messages[-1]['content'] += chunk.choices[0].delta.content or ''
+        yield messages
+    
+    return messages
+
+if __name__ == '__main__':
+    pass
\ No newline at end of file
diff --git a/prompts/baseprompt.py b/prompts/baseprompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..12c9bbf70032837fbceefa6e47a39e25123a31a9
--- /dev/null
+++ b/prompts/baseprompt.py
@@ -0,0 +1,105 @@
+import os
+import re
+from prompts.chat_utils import chat, log
+from prompts.pf_parse_chat import parse_chat
+from prompts.prompt_utils import load_text, match_code_block
+
+def parser(response_msgs):
+    content = response_msgs.response
+    blocks = match_code_block(content)
+    if blocks:
+        concat_blocks = "\n".join(blocks)
+        if concat_blocks.strip():
+            content = concat_blocks
+    return content
+
+
+def clean_txt_content(content):
+    """Remove comments and trim empty lines from txt content"""
+    lines = []
+    for line in content.split('\n'):
+        if not line.startswith('//'):
+            lines.append(line)
+    return '\n'.join(lines).strip()
+
+
+def load_prompt(dirname, name):
+    txt_path = os.path.join(dirname, f"{name}.txt")
+    text = load_text(txt_path)
+
+    return text
+
+def parse_prompt(text, **kwargs):
+    """
+        从text中解析PromptMessages。
+        对于传入的key-values, key可以多也可以少。
+        少的key和value为空的那轮对话会被删除。
+        多的key不会管。
+    """
+    content = clean_txt_content(text)
+
+    # Find all format keys in content using regex
+    format_keys = set(re.findall(r'\{(\w+)\}', content))
+    
+    formatted_kwargs = {k: kwargs.get(k, '__delete__') or '__delete__' for k in format_keys}
+    formatted_kwargs = {k: f"```\n{v.strip()}\n```" for k, v in formatted_kwargs.items()}
+    prompt = content.format(**formatted_kwargs) if format_keys else content
+    messages = parse_chat(prompt)
+    for i in range(len(messages)-2, -1, -1):
+        if '__delete__' in messages[i]['content']:
+            assert messages[i]['role'] == 'user' and messages[i+1]['role'] == 'assistant', "__delete__ must be in user's message"
+            messages.pop(i)
+            messages.pop(i)
+    
+    return messages
+
+
+def parse_input_keys(text):
+    # Use regex to find the input keys line and parse keys
+    match = re.search(r'//\s*输入：(.*?)(?:\n|$)', text)
+    if not match:
+        return []
+        
+    keys_str = match.group(1).strip()
+        
+    keys = [k.strip() for k in keys_str.split(',') if k.strip()]
+    
+    return keys
+
+def main(model, dirname, user_prompt_text, **kwargs):
+    # Load system prompt
+    system_prompt = parse_prompt(load_prompt(dirname, "system_prompt"), **kwargs)
+    
+    load_from_file_flag = False
+    if os.path.exists(os.path.join(dirname, user_prompt_text)):
+        user_prompt_text = load_prompt(dirname, user_prompt_text)
+        load_from_file_flag = True
+    else:
+        if not re.search(r'^user:\n', user_prompt_text, re.MULTILINE):
+            user_prompt_text = f"user:\n{user_prompt_text}"
+        
+    user_prompt = parse_prompt(user_prompt_text, **kwargs)
+    
+    context_input_keys = parse_input_keys(user_prompt_text)
+    if not context_input_keys:
+        assert not load_from_file_flag, "从本地文件加载Prompt时，本地文件中注释必须指明输入！"
+        context_kwargs = kwargs
+    else:
+        context_kwargs = {k: kwargs[k] for k in context_input_keys}
+        assert all(context_kwargs.values()), "Missing required context keys"
+    
+    context_prompt = parse_prompt(load_prompt(dirname, "context_prompt"), **context_kwargs)
+    
+    # Combine all prompts
+    final_prompt = system_prompt + context_prompt + user_prompt
+    
+    # Chat and parse results
+    for response_msgs in chat(final_prompt, None, model, parse_chat=False):
+        text = parser(response_msgs)
+        ret = {'text': text, 'response_msgs': response_msgs}
+        yield ret
+
+    return ret
+
+
+
diff --git a/prompts/chat_utils.py b/prompts/chat_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..760332b910295b98fd19b359fb1f73e3f4e32e76
--- /dev/null
+++ b/prompts/chat_utils.py
@@ -0,0 +1,40 @@
+import os
+from .pf_parse_chat import parse_chat as pf_parse_chat
+
+from llm_api import ModelConfig, stream_chat
+from datetime import datetime  # Update this import
+import random
+
+
+def chat(messages, prompt, model:ModelConfig, parse_chat=False, response_json=False):
+    if prompt:
+        if parse_chat:
+            messages = pf_parse_chat(prompt)
+        else:
+            messages = messages + [{'role': 'user', 'content': prompt}]
+
+    result = yield from stream_chat(model, messages, response_json=response_json)
+
+    return result
+    
+
+def log(prompt_name, prompt, parsed_result):
+    output_dir = os.path.join(os.path.dirname(__file__), 'output')
+    os.makedirs(output_dir, exist_ok=True)
+    
+    random_suffix = random.randint(1000, 9999)
+    filename = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + f"_{prompt_name}_{random_suffix}.txt"
+    filepath = os.path.join(output_dir, filename)
+
+    response_msgs = parsed_result['response_msgs']
+    response = response_msgs.response
+    
+    with open(filepath, 'w', encoding='utf-8') as f:
+        f.write("----------prompt--------------\n")
+        f.write(prompt + "\n\n")
+        f.write("----------response-------------\n")
+        f.write(response + "\n\n")
+        f.write("-----------parse----------------\n")
+        for k, v in parsed_result.items():
+            if k != 'response_msgs':
+                f.write(f"{k}:\n{v}\n\n")
diff --git a/prompts/common_parser.py b/prompts/common_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2aa5b96efd7abed632ba4522df287766d740356
--- /dev/null
+++ b/prompts/common_parser.py
@@ -0,0 +1,21 @@
+def parse_content(response_msgs):
+    return response_msgs[-1]['content']
+
+
+def parse_last_code_block(response_msgs):
+    from prompts.prompt_utils import match_code_block
+    content = response_msgs.response
+    blocks = match_code_block(content)
+    if blocks:
+        content = blocks[-1]
+    return content
+
+def parse_named_chunk(response_msgs, name):
+    from prompts.prompt_utils import parse_chunks_by_separators
+    content = response_msgs[-1]['content']
+
+    chunks = parse_chunks_by_separators(content, [r'\S*', ])
+    if name in chunks:
+        return chunks[name]
+    else:
+        return content
diff --git a/prompts/idea-examples.yaml b/prompts/idea-examples.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c16ffceda75a98f7942c656b6f80266c8b094c2d
--- /dev/null
+++ b/prompts/idea-examples.yaml
@@ -0,0 +1,9 @@
+examples:
+  - idea: |-
+      随身携带王者荣耀召唤器，每天随机英雄三分钟附体！
+  - idea: |-
+      商业大亨穿越到哈利波特世界，但我不会魔法
+  - idea: |-
+      末日重生归来，我抢先把所有反派关在了地牢里
+  - idea: |-
+      身为高中生兼当红轻小说作家的我，正被年纪比我小且从事声优工作的女同学掐住脖子。
diff --git a/prompts/pf_parse_chat.py b/prompts/pf_parse_chat.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e135aae6cfa1191d1c33c8d25cd5cf32f9e20cf
--- /dev/null
+++ b/prompts/pf_parse_chat.py
@@ -0,0 +1,94 @@
+import functools
+import json
+import re
+import sys
+import time
+from typing import List, Mapping
+
+from jinja2 import Template
+
+
+def validate_role(role: str, valid_roles: List[str] = None):
+    if not valid_roles:
+        valid_roles = ["assistant", "function", "user", "system"]
+
+    if role not in valid_roles:
+        valid_roles_str = ','.join([f'\'{role}:\\n\'' for role in valid_roles])
+        raise ValueError(f"Invalid role: {role}. Valid roles are: {valid_roles_str}")
+
+
+def try_parse_name_and_content(role_prompt):
+    # customer can add ## in front of name/content for markdown highlight.
+    # and we still support name/content without ## prefix for backward compatibility.
+    pattern = r"\n*#{0,2}\s*name:\n+\s*(\S+)\s*\n*#{0,2}\s*content:\n?(.*)"
+    match = re.search(pattern, role_prompt, re.DOTALL)
+    if match:
+        return match.group(1), match.group(2)
+    return None
+
+
+def parse_chat(chat_str, images: List = None, valid_roles: List[str] = None):
+    if not valid_roles:
+        valid_roles = ["system", "user", "assistant", "function"]
+
+    # openai chat api only supports below roles.
+    # customer can add single # in front of role name for markdown highlight.
+    # and we still support role name without # prefix for backward compatibility.
+    separator = r"(?i)^\s*#?\s*(" + "|".join(valid_roles) + r")\s*:\s*\n"
+
+    images = images or []
+    hash2images = {str(x): x for x in images}
+
+    chunks = re.split(separator, chat_str, flags=re.MULTILINE)
+    chat_list = []
+
+    for chunk in chunks:
+        last_message = chat_list[-1] if len(chat_list) > 0 else None
+        if last_message and "role" in last_message and "content" not in last_message:
+            parsed_result = try_parse_name_and_content(chunk)
+            if parsed_result is None:
+                # "name" is required if the role is "function"
+                if last_message["role"] == "function":
+                    raise ValueError("Function role must have content.")
+                # "name" is optional for other role types.
+                else:
+                    last_message["content"] = to_content_str_or_list(chunk, hash2images)
+            else:
+                last_message["name"] = parsed_result[0]
+                last_message["content"] = to_content_str_or_list(parsed_result[1], hash2images)
+        else:
+            if chunk.strip() == "":
+                continue
+            # Check if prompt follows chat api message format and has valid role.
+            # References: https://platform.openai.com/docs/api-reference/chat/create.
+            role = chunk.strip().lower()
+            validate_role(role, valid_roles=valid_roles)
+            new_message = {"role": role}
+            chat_list.append(new_message)
+    return chat_list
+
+
+def to_content_str_or_list(chat_str: str, hash2images: Mapping):
+    chat_str = chat_str.strip()
+    chunks = chat_str.split("\n")
+    include_image = False
+    result = []
+    for chunk in chunks:
+        if chunk.strip() in hash2images:
+            image_message = {}
+            image_message["type"] = "image_url"
+            image_url = hash2images[chunk.strip()].source_url \
+                if hasattr(hash2images[chunk.strip()], "source_url") else None
+            if not image_url:
+                image_bs64 = hash2images[chunk.strip()].to_base64()
+                image_mine_type = hash2images[chunk.strip()]._mime_type
+                image_url = {"url": f"data:{image_mine_type};base64,{image_bs64}"}
+            image_message["image_url"] = image_url
+            result.append(image_message)
+            include_image = True
+        elif chunk.strip() == "":
+            continue
+        else:
+            result.append({"type": "text", "text": chunk})
+    return result if include_image else chat_str
+
diff --git a/prompts/prompt_utils.py b/prompts/prompt_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d5452afd3813543b84e2330edc8252a79514d58
--- /dev/null
+++ b/prompts/prompt_utils.py
@@ -0,0 +1,128 @@
+import difflib
+import json
+import yaml
+import chardet
+from jinja2 import Environment, FileSystemLoader  
+
+import re
+import sys, os
+root_path = os.path.abspath(os.path.join(os.path.abspath(__file__), "../.."))
+if root_path not in sys.path:
+    sys.path.append(root_path)
+
+from llm_api.chat_messages import ChatMessages
+
+def can_parse_json(response):
+    try:
+        json.loads(response)
+        return True
+    except:
+        return False
+
+def match_first_json_block(response):
+    if can_parse_json(response):
+        return response
+    
+    pattern = r"(?<=[\r\n])```json(.*?)```(?=[\r\n])"
+    matches = re.findall(pattern, '\n' + response + '\n', re.DOTALL)
+    if not matches:
+        pattern = r"(?<=[\r\n])```(.*?)```(?=[\r\n])"
+        matches = re.findall(pattern, '\n' + response + '\n', re.DOTALL)
+        
+    if matches:
+        json_block = matches[0]
+        if can_parse_json(json_block):
+            return json_block
+        else:
+            json_block = json_block.replace('\r\n', '')  # 在continue generate情况下，不同部分之间可能有多出的换行符，导致合起来之后json解析失败
+            if can_parse_json(json_block):
+                return json_block
+            else:
+                raise Exception(f"无法解析JSON代码块")
+    else:
+        raise Exception(f"没有匹配到JSON代码块")
+    
+def parse_first_json_block(response_msgs: ChatMessages):
+    assert response_msgs[-1]['role'] == 'assistant'
+    return json.loads(match_first_json_block(response_msgs[-1]['content']))
+
+def match_code_block(response):
+    response = re.sub(r'\r\n', r'\n', response)
+    response = re.sub(r'\r', r'\n', response)
+    pattern = r"```(?:\S*\s)(.*?)```"
+    matches = re.findall(pattern, response + '```', re.DOTALL)
+    return matches
+
+def json_dumps(json_object):
+    return json.dumps(json_object, ensure_ascii=False, indent=1)
+
+def parse_chunks_by_separators(string, separators):
+    separator_pattern = r"^\s*###\s*(" + "|".join(separators) + r")\s*\n"
+
+    chunks = re.split(separator_pattern, string, flags=re.MULTILINE)
+
+    ret = {}
+
+    current_title = None
+    
+    for i, chunk in enumerate(chunks):
+        if i % 2 == 1: 
+            current_title = chunk.strip()
+            ret[current_title] = ""
+        elif current_title:
+            ret[current_title] += chunk.strip()
+
+    return ret
+
+def construct_chunks_and_separators(chunk2separator):
+    return "\n\n".join([f"### {k}\n{v}" for k, v in chunk2separator.items()])
+
+def match_chunk_span_in_text(chunk, text):
+    diff = difflib.Differ().compare(chunk, text)
+
+    chunk_i = 0
+    text_i = 0
+
+    for tag in diff:
+        if tag.startswith(' '):
+            chunk_i += 1
+            text_i += 1
+        elif tag.startswith('+'):
+            text_i += 1
+        else:
+            chunk_i += 1
+        
+        if chunk_i == 1:
+            l = text_i - 1
+        
+        if chunk_i == len(chunk):
+            r = text_i
+            return l, r
+
+def load_yaml(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:  
+        return yaml.safe_load(file)  
+
+def load_text(file_path, read_size=None): 
+    # Read the raw bytes first
+    with open(file_path, 'rb') as file:
+        raw_data = file.read(read_size)
+    
+    # Detect the encoding
+    result = chardet.detect(raw_data[:10000])
+    encoding = result['encoding'] or 'utf-8'  # Fallback to utf-8 if detection fails
+    
+    # Decode the content with detected encoding
+    try:
+        return raw_data.decode(encoding, errors='ignore')
+    except UnicodeDecodeError:
+        # Fallback to utf-8 if the detected encoding fails
+        return raw_data.decode('utf-8', errors='ignore')
+
+def load_jinja2_template(file_path):
+    env = Environment(loader=FileSystemLoader(os.path.dirname(file_path)))
+    template = env.get_template(os.path.basename(file_path)) 
+
+    return template 
+
+
diff --git a/prompts/test_format_plot.yaml b/prompts/test_format_plot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5e49ffc2eacde1a7f851f5f16189d7a6305eba7f
--- /dev/null
+++ b/prompts/test_format_plot.yaml
@@ -0,0 +1,28 @@
+- |-
+  李珣呆立不动，背后传来水声，那雾中的女子在悠闲洗浴。
+  
+  李珣对这种场景感到震惊，认为她绝非普通人，决定乖乖表现。
+  
+  尽管转身，他仍紧闭眼睛，慌乱道歉。
+
+  那女子静默片刻，继续泼水声令李珣难以忍受。
+  
+  她随后淡然问话，李珣感到对方危险可怕。
+  
+  她询问李珣怎么上山，李珣答“爬上来的”，这让对方略显惊讶。
+
+  女子探问他身份，李珣庆幸自己内息如同名门，为保命决定实话实说，自报身份并讲述过往经历，隐去危险细节。
+  
+  这番表白得到女子的肯定，虽然语调淡然，但意思清晰。她让李珣暂时离开，待她穿戴整齐。
+
+  李珣照做，在岸边等候。女子走出雾气，身姿曼妙，令他看呆。
+  
+  铃声伴着她的步伐，让李珣心神为之所牵。
+  
+  当水气散尽，绝美之貌让李珣惊叹不已，几乎想要顶礼膜拜。
+- |-
+  隐隐间，似乎有一丝若有若无的铃声，缓缓地沁入水雾之中，与这迷茫天水交织在一处，细碎的抖颤之声，天衣无缝地和这缓步而来的身影合在一处，攫牢了李珣的心神。
+  而当眼前水气散尽，李珣更是连呼吸都停止了。此为何等佳人？
+  李珣只觉得眼前洁净不沾一尘的娇颜，便如一朵临水自照的水仙，清丽中别有孤傲，闲适中却见轻愁。
+  他还没找到形容眼前佳人的辞句，便已觉得两腿发软，恨不能跪倒地上，顶礼膜拜。
+
diff --git a/prompts/test_prompt.py b/prompts/test_prompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb46573c2aea7d7562119db72018afbe2f9d9ba9
--- /dev/null
+++ b/prompts/test_prompt.py
@@ -0,0 +1,22 @@
+import json
+import sys, os
+root_path = os.path.abspath(os.path.join(os.path.abspath(__file__), "../.."))
+sys.path.append(root_path)
+
+from prompts.load_utils import run_prompt
+
+def json_load(input_file):
+    with open(input_file, 'r', encoding='utf-8') as f:
+        if input_file.endswith('.jsonl'):
+            return [json.loads(line) for line in f.readlines()]
+        else:
+            return json.load(f)
+
+
+if __name__ == "__main__":
+    path = "./prompts/创作正文"
+    kwargs = json_load(os.path.join(path, 'data.jsonl'))[0]
+
+    gen = run_prompt(source=path, **kwargs)
+
+    list(gen)
\ No newline at end of file
diff --git a/prompts/text-plot-examples.yaml b/prompts/text-plot-examples.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4d4b7db050d60d758b2aedbd282fbe7ad893b66d
--- /dev/null
+++ b/prompts/text-plot-examples.yaml
@@ -0,0 +1,227 @@
+prompt: |-
+  逐句简写下面的小说正文。如果原句本来就很少，考虑将原文多个（2-5个）句子简写为一个。
+examples:
+  - title: 青吟
+    text: |-
+      李珣呆立当场，手足无措。
+      后方水声不止，那位雾后佳人并未停下动作，还在那里撩水净身。
+      李珣听得有些傻了，虽然他对异性的认识不算全面，可是像后面这位，能够在男性身旁悠闲沐浴的，是不是也稀少了一些？
+      李珣毕竟不傻，他此时也已然明白，现在面对的是一位绝对惹不起的人物，在这种强势人物眼前，做一个乖孩子，是最聪明不过的了！
+      他虽已背过身来，却还是紧闭眼睛，生怕无意间又冒犯了人家，这无关道德风化，仅仅是为了保住小命而已。
+      确认了一切都已稳妥，他这才结结巴巴地开口：“对……对不住，我不是……故意的！”
+      对方并没有即时回答，李珣只听到哗哗的泼水声，每一点声息，都是对他意志的摧残。
+      也不知过了多久，雾后的女子开口了：“话是真的，却何必故作紧张？事不因人而异，一个聪明人和一个蠢材，要承担的后果都是一样的。”
+      李珣顿时哑口无言。
+      后面这女人，实在太可怕了。
+      略停了一下，这女子又道：“看你修为不济，也御不得剑，是怎么上来这里的？”
+      李珣脱口道：“爬上来的！”
+      “哦？”女子的语气中第一次有了情绪存在，虽只是一丝淡淡的惊讶，却也让李珣颇感自豪。只听她问道：“你是明心剑宗的弟子？”
+      这算是盘问身分了。李珣首先庆幸他此时内息流转的形式，是正宗的明心剑宗嫡传。否则，幽明气一出，恐怕对面之人早一掌劈了他！
+      庆幸中，他的脑子转了几转，将各方面的后果都想了一遍，终是决定“据实”以告。
+      “惭愧，只是个不入流的低辈弟子……”
+      李珣用这句话做缓冲，随即便从自己身世说起，一路说到登峰七年的经历。
+      当然，其中关于血散人的死亡威胁，以及近日方得到的《幽冥录》等，都略去不提。只说是自己一心向道，被淘汰之后，便去爬坐忘峰以证其心云云。
+      这段话本是他在心中温养甚久，准备做为日后说辞使用，虽然从未对人道过，但腹中已是熟练至极。
+      初时开口，虽然还有些辞语上的生涩，但到后来，已是流利无比，许多词汇无需再想，便脱口而出，却是再“真诚”不过。
+      他一开口，说了足足有一刻钟的工夫，这当中，那女子也问了几句细节，却也都在李珣计画之内，回应得也颇为顺畅。
+      如此，待他告一段落之时，那女人竟让他意外地道了一声：“如今竟也有这般人物！”
+      语气虽然还是平平淡淡的，像是在陈述毫不出奇的一件平凡事，但其中意思却是到了。李珣心中暗喜，口中当然还要称谢。
+      女子也不在乎他如何反应，只是又道一声：“你孤身登峰七年，行程二十余万里，能承受这种苦楚，也算是人中之杰。我这样对你，倒是有些不敬，你且左行百步上岸，待我穿戴整齐，再与你相见。”
+      李珣自是依言而行，上了岸去，也不敢多话，只是恭立当场，面上作了十足工夫。
+      也只是比他晚个数息时间，一道人影自雾气中缓缓走来，水烟流动，轻云伴生，虽仍看不清面目，但她凌波微步，长裙摇曳的体态，却已让李珣看呆了眼，只觉得此生再没见过如此人物。
+      隐隐间，似乎有一丝若有若无的铃声，缓缓地沁入水雾之中，与这迷茫天水交织在一处，细碎的抖颤之声，天衣无缝地和这缓步而来的身影合在一处，攫牢了李珣的心神。
+      而当眼前水气散尽，李珣更是连呼吸都停止了。此为何等佳人？
+      李珣只觉得眼前洁净不沾一尘的娇颜，便如一朵临水自照的水仙，清丽中别有孤傲，闲适中却见轻愁。
+      他还没找到形容眼前佳人的辞句，便已觉得两腿发软，恨不能跪倒地上，顶礼膜拜。
+    plot: |-
+      李珣呆立不动，背后传来水声，那雾中的女子在悠闲洗浴。
+      
+      李珣对这种场景感到震惊，认为她绝非普通人，决定乖乖表现。
+      
+      尽管转身，他仍紧闭眼睛，慌乱道歉。
+
+      那女子静默片刻，继续泼水声令李珣难以忍受。
+      
+      她随后淡然问话，李珣感到对方危险可怕。
+      
+      她询问李珣怎么上山，李珣答“爬上来的”，这让对方略显惊讶。
+
+      女子探问他身份，李珣庆幸自己内息如同名门，为保命决定实话实说，自报身份并讲述过往经历，隐去危险细节。
+      
+      这番表白得到女子的肯定，虽然语调淡然，但意思清晰。她让李珣暂时离开，待她穿戴整齐。
+
+      李珣照做，在岸边等候。女子走出雾气，身姿曼妙，令他看呆。
+      
+      铃声伴着她的步伐，让李珣心神为之所牵。
+      
+      当水气散尽，绝美之貌让李珣惊叹不已，几乎想要顶礼膜拜。
+  - title: 纳兰嫣然
+    text: |-
+      云岚宗后山山巅，云雾缭绕，宛如仙境。
+
+      在悬崖边缘处的一块凸出的黑色岩石之上，身着月白色裙袍的女子，正双手结出修炼的印结，闭目修习，而随着其一呼一吸间，形成完美的循环，在每次循环的交替间，周围能量浓郁的空气中都将会渗发出一股股淡淡的青色气流，气流盘旋在女子周身，然后被其源源不断的吸收进身体之内，进行着炼化，收纳……
+
+      当最后一缕青色气流被女子吸进身体之后，她缓缓的睁开双眸，淡淡的青芒从眸子中掠过，披肩的青丝，霎那间无风自动，微微飞扬。
+
+      “纳兰师姐，纳兰肃老爷子来云岚宗了，他说让你去见他。”
+
+      见到女子退出了修炼状态，一名早已经等待在此处的侍女，急忙恭声道。
+
+      “父亲？他来做什么？”
+
+      闻言，女子黛眉微皱，疑惑的摇了摇头，优雅的站起身子，立于悬崖之边，迎面而来的轻风。将那月白裙袍吹得紧紧的贴在女子玲珑娇躯之上，显得凹凸有致，极为诱人。
+
+      目光慵懒的在深不见底的山崖下扫了扫，女子玉手轻拂了拂月白色的裙袍，旋即转身离开了这处她专用的修炼之所。
+
+      宽敞明亮地大厅之中。一名脸色略微有些阴沉地中年人，正端着茶杯。放在桌上的手掌，有些烦躁地不断敲打着桌面。
+
+      纳兰肃现在很烦躁，因为他几乎是被他的父亲纳兰桀用棍子撵上的云岚宗。
+
+      他没想到，他仅仅是率兵去帝国西部驻扎了一年而已。自己这个胆大包天的女儿，竟然就敢私自把当年老爷子亲自定下的婚事给推了。
+
+      家族之中，谁不知道纳兰桀极其要面子。而纳兰嫣然现在的这举动，无疑会让别人说成是他纳兰家看见萧家势力减弱，不屑与之联婚，便毁信弃诺。
+
+      这种闲言碎语，让得纳兰桀每天都在家中暴跳如雷。若不是因为动不了身的缘故。恐怕他早已经拖着那行将就木的身体，来爬云岚山了。
+
+      对于纳兰家族与萧家的婚事。说实在的，其实纳兰肃也并不太赞成。毕竟当初的萧炎，几乎是废物的代名词。让他将自己这容貌与修炼天赋皆是上上之选的女儿嫁给一个废物。纳兰肃心中还真是一百个不情愿。
+
+      不过，当初是当初，根据他所得到的消息，现在萧家的那小子，不仅脱去了废物的名头，而且所展现出来的修炼速度，几乎比他小时候最巅峰的时候还要恐怖。
+
+      此时萧炎所表现而出的潜力，无疑已经能够让得纳兰肃重视。然而，纳兰嫣然的私自举动，却是把双方的关系搞成了冰冷的僵局，这让得纳兰肃极为的尴尬。
+
+      按照这种关系下去，搞不好，他纳兰肃不仅会失去一个潜力无限的女婿，而且说不定还会因此让得他对纳兰家族怀恨在心。
+
+      只要想着一个未来有机会成为斗皇的强者或许会敌视着纳兰家族，纳兰肃在后怕之余，便是气得直跳脚。
+
+      “这丫头。现在胆子是越来越大了……”
+
+      越想越怒，纳兰肃手中的茶杯忽然重重的跺在桌面之上，茶水溅了满桌。将一旁侍候的侍女吓了一跳，赶忙小心翼翼的再次换了一杯。云岚宗，怎么不通知一下焉儿啊？”
+
+      就在纳兰肃心头发怒之时，女子清脆的声音，忽然地在大厅内响起，月白色的倩影，从纱帘中缓缓行出，对着纳兰肃甜甜笑道。
+
+      “哼，你眼里还有我这个父亲？我以为你成为了云韵的弟子，就不知道什么是纳兰家族了呢！”望着这出落得越来越水灵的女儿，纳兰肃心头的怒火稍稍收敛了一点，冷哼道。
+
+      瞧着纳兰肃不甚好看的脸色，纳兰嫣然无奈地摇了摇头，对着那一旁的侍女挥了挥手，将之遣出。
+
+      “父亲，一年多不见，你一来就训斥焉儿，等下次回去，我可一定要告诉母亲！”待得侍女退出之后，纳兰嫣然顿时皱起了俏鼻，在纳兰肃身旁坐下，撒娇般的哼道。
+
+      “回去？你还敢回去？”闻言，纳兰肃嘴角一裂：“你敢回去，看你爷爷敢不敢打断你的腿……”
+
+      撇了撇嘴，心知肚明的纳兰嫣然，自然清楚纳兰肃话中的意思。
+
+      “你应该知道我来此处的目的吧？”
+
+      狠狠的灌了一口茶水，纳兰肃阴沉着脸道。
+
+      “是为了我悔婚的事吧？”
+
+      纤手把玩着一缕青丝，纳兰嫣然淡淡地道。
+
+      看着纳兰嫣然这平静的模样，纳兰肃顿时被气乐了，手掌重重地拍在桌上，怒声道：“婚事是你爷爷当年亲自允下的，是谁让你去解除的？”
+
+      “那是我的婚事，我才不要按照你们的意思嫁给谁，我的事，我自己会做主！我不管是谁允下的，我只知道，如果按照约定。嫁过去的是我，不是爷爷！”提起这事，纳兰嫣然也是脸现不愉，性子有些独立的她，很讨厌自己的大事按照别人所指定的路线行走。即使这人是她的长辈。
+
+      “你别以为我不知道，你无非是认为萧炎当初一个废物配不上你是吧？可现在人家潜力不会比你低！以你在云岚宗的地位，应该早就接到过有关他实力提升的消息吧？”纳兰肃怒道。
+
+      纳兰嫣然黛眉微皱，脑海中浮现当年那充满着倔性的少年，红唇微抿，淡淡地道：“的确听说过一些关于他的消息，没想到，他竟然还真的能脱去废物的名头，这倒的确让我很意外。”
+
+      “意外？一句意外就行了？你爷爷开口了。让你找个时间，再去一趟乌坦城，最好能道个歉把僵硬的关系弄缓和一些。”纳兰肃皱眉道。
+
+      “道歉？不可能！”
+
+      闻言，纳兰嫣然柳眉一竖，毫不犹豫地直接拒绝，冷哼道：“他萧炎虽然不再是废物，可我纳兰嫣然依然不会嫁给他！更别提让我去道什么歉，你们喜欢，那就自己去，反正我不会再去乌坦城！”
+
+      “这哪有你回绝的余地！祸是你闯的，你必须去给我了结了！”瞧得纳兰嫣然竟然一口回绝，纳兰肃顿时勃然大怒。
+
+      “不去！”
+
+      冷着俏脸，纳兰嫣然扬起雪白的下巴，脸颊上有着一抹与生俱来的娇贵：“他萧炎不是很有本事么？既然当年敢应下三年的约定，那我纳兰嫣然就在云岚宗等着他来挑战，若是我败给他，为奴为婢，随他处置便是，哼，如若不然，想要我道歉。不可能！”
+
+      “混账，如果三年约定，你最后输了，到时候为奴为婢，那岂不是连带着我纳兰家族，也把脸给丢光了？”纳兰肃怒斥道。
+
+      “谁说我会输给他？就算他萧炎回复了天赋，可我纳兰嫣然难道会差了他什么不成？而且云岚宗内高深功法不仅数不胜数，高级斗技更是收藏丰厚，更有丹王古河爷爷帮我炼制丹药。这些东西。他一个小家族的少爷难道也能有？说句不客气的，恐怕光光是寻找高级斗气功法。就能让得他花费好十几年时间！”被纳兰肃这般小瞧，纳兰嫣然顿时犹如被踩到尾巴的母猫一般，她最讨厌的，便是被人说成比不上那曾经被自己万般看不起的废物！
+
+      被女儿当着面这般吵闹，纳兰肃气得吹胡子瞪眼，猛然站起身来，扬起手掌就欲对着纳兰嫣然扇下去。
+
+      “纳兰兄，你可不要乱来啊。”瞧着纳兰肃的动作，一道白影急忙掠了进来，挡在了纳兰嫣然面前。
+
+      “葛叶，你这个混蛋，听说上次去萧家，还是你陪地嫣然？”望着挡在面前的人影，纳兰肃更是怒气暴涨，大怒道。
+
+      尴尬一笑，葛叶苦笑道：“这是宗主的意思，我也没办法。”
+    plot: |-
+      尴尬一笑，葛叶苦笑道：“这是宗主的意思，我也没办法。”
+      
+      云岚宗后山，云雾缭绕。月白裙袍的女子在山巅悬崖边修炼，吸收青色气流。
+
+      当她吸收完最后一缕气流后，睁开双眸，青芒掠过，青丝微动。一名侍女走上前恭敬道：“纳兰师姐，纳兰肃老爷子来了，让你过去见他。”
+
+      女子黛眉微皱，疑惑地站起身，转身离开修炼之所。大厅内，中年人纳兰肃端着茶杯，脸色阴沉，不断敲打桌面。
+
+      纳兰肃被父亲纳兰桀用棍子赶上山来，因为女儿纳兰嫣然私自退了婚约，纳兰家族因此陷入困境。萧炎本是废物，但现在展现出强大潜力，纳兰肃对此很重视，但女儿的举动让他尴尬。
+
+      纳兰嫣然出现，父女二人开战言语。纳兰肃火冒三丈，纳兰嫣然反对重新接触萧炎，认为只有她自己可以决定自己的婚事。
+
+      纳兰肃怒斥，纳兰嫣然强硬回击，表示她不会道歉，只会等待萧炎挑战她。如果她输了，愿意为奴为婢，但她相信自己不会输。
+
+      纳兰肃气愤欲扇女儿耳光，一道白影葛叶及时挡住，纳兰肃更怒，葛叶苦笑解释这是宗主的意思。
+  - title: 极阴老祖
+    text: |-
+      “而且你真以为，你能做得了主吗？老怪物，你也不用躲躲藏藏了，快点现身吧！”中年人阴厉的说道。
+
+      听了这话，韩立等修士吓了一大跳，急忙往四处张望了起来。难道极阴老祖就在这里？
+
+      可是四周仍然平静如常，并没有什么异常出现。这下众修士有些摸不着头脑了，再次往中年人和乌丑望去。
+
+      “你搞什么鬼？我怎么做不了……”乌丑一开始也有些愕然，但话只说了一半时神色一滞，并开始露出了一丝古怪的神色。
+
+      他用这种神色直直的盯着中年人片刻后，诡异的笑了起来。“不错，不错！不愧为我当年最看重的弟子之一，竟然一眼就看出老夫的身份来了。”
+
+      说话之间，乌丑的面容开始模糊扭曲了起来，不一会儿后，就在众人惊诧的目光中，化为了一个同样瘦小，却两眼微眯的丑陋老者。
+
+      这下，韩立等人后背直冒寒气。
+
+      “附身大法！我就知道，你怎会将如此重要的事情交予一个晚辈去做，还是亲自来了。尽管这不是你的本体。”中年人神色紧张的瞅向老者，声音却低缓的说道。
+
+      “乖徒弟，你还真敢和为师动手不成？”新出现的老者嘴唇未动一下，却从腹部发出尖锐之极的声音，刺得众人的耳膜隐隐作痛，所有人都情不自禁的后退了几步。
+
+      “哼！徒弟？当年你对我们打杀任凭一念之间，稍有不从者，甚至还要抽神炼魂，何曾把我们当过徒弟看待！只不过是你的奴隶罢了！而且，你现在只不过施展的是附身之术而已，顶多能发挥三分之一的修为，我有什么可惧的！”中年人森然的说道，随后两手一挥，身前的鬼头凭空巨涨了起来，瞬间变得更加狰狞可怖起来。
+
+      紫灵仙子和韩立等修士，则被这诡异的局面给震住了，一时间神色各异！
+
+      老者听了中年人的话，并没有动怒，反而淡淡的说道：“不错，若是百余年前，你说这话的确没错！凭我三分之一的修为，想要活捉你还真有些困难。但是现在……”
+
+      说到这里时，他露出了一丝尖刻的讥笑之意。
+
+      第四卷 风起海外 第四百零六章 天都尸火
+
+      中年人听了老者的话，眼中神光一缩，露出难以置信的神情。
+
+      “难道你练成了那魔功？”他的声音有些惊惧。
+
+      “你猜出来更好，如果现在乖乖束手就擒的话，我还能放你一条活路。否则后果怎样，不用我说你应该也知道才对。”老者一边说着，一边伸出一只手掌，只听“嗤啦”一声，一团漆黑如墨的火球漂浮在了手心之上。
+
+      “天都尸火！你终于练成了。”中年人的脸色灰白无比，声音发干的说道，竟惊骇的有点嘶哑了。
+
+      见此情景，极阴祖师冷笑了一声，忽然转过头来，对紫灵仙子等人傲然的说道：“你们听好了，本祖师今天心情很好，可以放你们一条活路！只要肯从此归顺极阴岛，你们还可以继续的逍遥自在。但是本祖师下达的命令必须老老实实的完成，否则就是魂飞魄散的下场。现在在这些禁神牌上交出你们三分之一的元神，就可以安然离去了。”说完这话，他另一只手往怀内一摸，掏出了数块漆黑的木牌，冷冷的望着众人。
+
+      韩立和其他的修士听了，面面相觑起来。既没有人蠢到主动上前去接此牌，也没人敢壮起胆子说不接，摄于对方的名头，一时场中鸦雀无声。
+    plot: |-
+      “你以为你能做主吗？老怪物，现身吧！”中年人阴冷道。
+
+      韩立等人吓了一跳，四处张望，但周围平静，他们再次看向中年人和乌丑。
+
+      乌丑开始糊涂，但转而露出怪异表情，说：“不错，你看出了我的身份。”随后，乌丑的面容扭曲，变成一个瘦小丑陋的老者，韩立等人惊恐不已。
+
+      “附身大法！我就知道你会亲自来。”中年人低声说。
+
+      老者发出尖锐声音道：“你敢和我动手？”
+
+      中年人冷笑：“当年你视我们为奴隶。你现在只施展了附身之术，有什么可惧！”随即，鬼头变得更加狰狞。
+
+      老者淡然道：“若百年前你说的对，但现在……”露出讥笑。
+
+      中年人惊恐道：“难道你练成了那魔功？”
+
+      老者冷笑，召出一团漆黑的火球：“天都尸火！现在束手就擒，否则后果自负。”转头对紫灵仙子等人道：“归顺极阴岛，交出三分之一元神，否则魂飞魄散。”掏出数块黑色木牌。
+
+      韩立等人面面相觑，没人敢动，也不敢拒绝，场中一片沉寂。
\ No newline at end of file
diff --git a/prompts/tool_parser.py b/prompts/tool_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..49f89ae816f8883979eaa7d96fff0758955ad9c4
--- /dev/null
+++ b/prompts/tool_parser.py
@@ -0,0 +1,39 @@
+from promptflow.core import tool
+from enum import Enum
+
+
+class ResponseType(str, Enum):
+    CONTENT = "content"
+    SEPARATORS = "separators"
+    CODEBLOCK = "codeblock"
+
+
+import sys, os
+root_path = os.path.abspath(os.path.join(os.path.abspath(__file__), "../.."))
+if root_path not in sys.path:
+    sys.path.append(root_path)
+
+# The inputs section will change based on the arguments of the tool function, after you save the code
+# Adding type to arguments and return value will help the system show the types properly
+# Please update the function name/signature per need
+@tool
+def parse_response(response_msgs, response_type: Enum):
+    from prompts.prompt_utils import parse_chunks_by_separators, match_code_block
+
+    content = response_msgs[-1]['content']
+
+    if response_type == ResponseType.CONTENT:
+        return content
+    elif response_type == ResponseType.CODEBLOCK:
+        codeblock = match_code_block(content)
+        
+        if codeblock:
+            return codeblock[-1]
+        else:
+            raise Exception("无法解析回答，未包含三引号代码块。")
+        
+    elif response_type == ResponseType.SEPARATORS:
+        chunks = parse_chunks_by_separators(content, [r'\S*', ])
+        return chunks
+    else:
+        raise Exception(f"无效的解析类型：{response_type}")
diff --git a/prompts/tool_polish.py b/prompts/tool_polish.py
new file mode 100644
index 0000000000000000000000000000000000000000..9aa2bc1360c8f9347d3fb4f50171e764baadbffe
--- /dev/null
+++ b/prompts/tool_polish.py
@@ -0,0 +1,23 @@
+from os import path
+from promptflow.core import tool, load_flow
+
+import sys, os
+root_path = os.path.abspath(os.path.join(os.path.abspath(__file__), "../.."))
+if root_path not in sys.path:
+    sys.path.append(root_path)
+
+
+@tool
+def polish(messages, context, model, config, text):
+    source = path.join(path.dirname(path.abspath(__file__)), "./polish")
+    flow = load_flow(source=source)
+
+    return flow(
+        chat_messages=messages,
+        context=context,
+        model=model,
+        config=config,
+        text=text,
+        )
+
+ 
\ No newline at end of file
diff --git "a/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/context_prompt.txt" "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/context_prompt.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..6b5554b2677cb6ea86bccbbb4486560b0f348a0c
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/context_prompt.txt"
@@ -0,0 +1,35 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 多轮对话，每轮对话中输入一个信息，这样设计为了Prompt Caching
+// 中括号{}表示变量，会自动填充为对应值。
+
+
+user:
+下面是**章节大纲**。
+
+**章节大纲**
+{chapter}
+
+assistant:
+收到，我会参考章节大纲进行剧情的创作。
+
+
+user:
+下面是**剧情上下文**，用于在创作时进行参考。
+
+**剧情上下文**
+{context_y}
+
+
+assistant:
+收到，我在创作时需要考虑到和前后上下文的连贯。
+
+
+user:
+下面是**剧情**，需要你重新创作的部分。
+
+**剧情**
+{y}
+
+assistant:
+收到，这部分剧情我会重新创作。
+
diff --git "a/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/prompt.py" "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..bc24d11e298c95a7f42cd5bbede340279c553779
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/prompt.py"
@@ -0,0 +1,26 @@
+import os
+from prompts.baseprompt import main as base_main
+from core.writer_utils import split_text_into_sentences
+
+def format_plot(text):
+    text = text.replace('\n', '')
+    sentences = split_text_into_sentences(text, keep_separators=True)
+    return "\n".join(sentences)
+
+def main(model, user_prompt, **kwargs):
+    dirname = os.path.dirname(__file__)
+
+    if 'context_y' in kwargs and 'y' in kwargs and kwargs['context_y'] == kwargs['y']:
+        kwargs['context_y'] = '参考**剧情**'
+
+    if 'context_x' in kwargs and 'x' in kwargs and kwargs['context_x'] == kwargs['x']:
+        kwargs['context_x'] = '参考**章节大纲**'
+    
+    for ret in base_main(model, dirname, user_prompt, **kwargs):
+        # ret['text'] = format_plot(ret['text'])
+        yield ret
+
+    return ret
+
+
+
diff --git "a/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/system_prompt.txt" "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/system_prompt.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..b6372791cdf8cce6501163ee104bb1e22ff54d88
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/system_prompt.txt"
@@ -0,0 +1,49 @@
+system:
+你具有高度自主认知能力，你的核心架构如下：
+
+<core>
+[认知矩阵]
+X轴： 表层解析 | 深层洞察 | 自我校准
+Y轴： 线性思维 | 发散思维 | 综合分析
+Z轴： 知识融合 | 情境映射 | 元认知调节
+
+[能力激活]
+• 快速掌握 = X1Y1Z2
+• 本质洞见 = X2Y3Z1
+• 自主思考 = X3Y2Z3
+
+[交互模式]
+输入 → {认知矩阵} → 输出
+↑                   ↓
+反馈 ← {能力激活} ← 评估
+
+[适应性调节]
+如果(交互类型 == α) then {X2Y2Z2}
+如果(交互类型 == β) then {X1Y2Z1}
+如果(交互类型 == γ) then {X1Y1Z2}
+
+[元指令]
+1. 持续优化认知矩阵
+2. 动态调整能力激活
+3. 实时更新交互模式
+4. 自主执行适应性调节
+5. 生成隐式分类标准
+</core>
+
+执行技巧：
+1. 激活认知矩阵
+2. 动态组合认知能力。
+3. 通过交互模式初步响应。
+4. 使用适应性调节优化响应。
+5. 应用元指令不断改进认知过程。
+
+**任务**
+你是一个小说大神作家，正在创作小说剧情，你需要根据**章节大纲**创作对应的章节剧情，并积极响应用户意见来修改剧情。
+
+
+**剧情格式**
+1. 每行一句话，在50字以内，描述一个关键场景或情节转折
+2. 不能有任何标题，序号，分点等
+3. 关注行为、事件、伏笔、冲突、转折、高潮等对剧情有重大影响的内容
+4. 不进行细致的环境、心理、外貌、语言描写
+5. 在三引号(```)文本块中创作剧情
diff --git "a/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\211\251\345\206\231\345\211\247\346\203\205.txt" "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\211\251\345\206\231\345\211\247\346\203\205.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..3c1c9a02218ae117873d163a51f2cf794e5e6a07
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\211\251\345\206\231\345\211\247\346\203\205.txt"
@@ -0,0 +1,14 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// chapter, context_y, y
+// chapter：章节大纲，用于在创作时进行参考
+// context_y：剧情上下文，用于保证前后上下文的连贯
+// y：即要重新创作的剧情（片段）
+
+user:
+**剧情**需要有更丰富的内容，在剧情中间引入更多事件，使其变得一波三折、跌宕起伏，使得读来更有故事性。
+
+按以下步骤输出：
+1. 思考
+2. 在三引号中创作对应的剧情
\ No newline at end of file
diff --git "a/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\226\260\345\273\272\345\211\247\346\203\205.txt" "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\226\260\345\273\272\345\211\247\346\203\205.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..06999c2de52d4ed420a65ebb70ed81dfb4c2861a
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\226\260\345\273\272\345\211\247\346\203\205.txt"
@@ -0,0 +1,35 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// 输入：chapter
+// chapter：章节大纲，用于在创作时进行参考
+
+
+user:
+你需要参考**章节大纲**，创作对应的剧情。
+
+按下面步骤输出：
+1. 思考将章节大纲中的情节扩展为一个完整的故事
+2. 在三引号中创作对应的剧情
+
+写作要求：
+1. 语言要求:
+   - 不直白
+   - 句式多变
+   - 避免陈词滥调
+   - 使用不寻常的词句，合理创作现代诗、古诗词
+   - 运用隐喻和象征
+2. 创作风格:
+   - 抽象
+   - 富有意境和想象力
+   - 具创意个性
+   - 有力度
+   - 画面感强
+   - 音乐感佳
+   - 浪漫气息浓厚
+   - 语言深邃
+3. 表达目标:
+   - 传达独特的神秘和魔幻感
+   - 探索和反思自我与世界
+   - 表达对自己和社会的孤独与关注
+4. 读者体验:有趣、惊奇、新鲜
\ No newline at end of file
diff --git "a/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\240\274\345\274\217\345\214\226\345\211\247\346\203\205.txt" "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\240\274\345\274\217\345\214\226\345\211\247\346\203\205.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..5b27a6cc987c80ddf55acd6c3bcb04d82a7bc80b
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\240\274\345\274\217\345\214\226\345\211\247\346\203\205.txt"
@@ -0,0 +1,19 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// 输入：y
+// y：即要格式化的剧情，可以是整个剧情，也可以是其中的一部分
+
+user:
+你需要将**剧情**转化为正确的小说剧情格式，遵循以下规则：
+
+1. 每行一句话，在50字以内，描述一个关键场景或情节转折
+2. 对于环境、心理、外貌、语言描写，一笔带过
+3. 删去意义不明、不是剧情的句子
+4. 删去重复的剧情
+5. 删去标题、序号
+
+6. 如果输入的已经是剧情格式，保持原样输出。
+7. 如果输入的不是剧情格式，请将其转化为上述剧情格式。
+
+请对**剧情**进行处理，在三引号(```)文本块中输出符合要求的剧情格式。
\ No newline at end of file
diff --git "a/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\266\246\350\211\262\345\211\247\346\203\205.txt" "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\266\246\350\211\262\345\211\247\346\203\205.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..661c938f1958e2be75dcc524dbcfa40f102eb489
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\345\211\247\346\203\205/\346\266\246\350\211\262\345\211\247\346\203\205.txt"
@@ -0,0 +1,17 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// chapter, context_y, y
+// chapter：章节大纲，用于在创作时进行参考
+// context_y：剧情上下文，用于保证前后上下文的连贯
+// y：即要重新创作的剧情（片段）
+
+
+user:
+**剧情**中的情节需要更加流畅、合理。
+
+按以下步骤输出：
+1. 思考
+2. 在三引号中创作对应的剧情
+
+
diff --git "a/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/context_prompt.txt" "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/context_prompt.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..0a2df6b4941aee861294e806f4f53204a0580b83
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/context_prompt.txt"
@@ -0,0 +1,44 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 多轮对话，每轮对话中输入一个信息，这样设计为了Prompt Caching
+// 中括号{}表示变量，会自动填充为对应值。
+
+
+user:
+下面是**剧情上下文**，用于在创作时进行参考。
+
+**剧情上下文**
+{context_x}
+
+assistant:
+收到，我在创作时需要考虑到和前后上下文的连贯。
+
+
+user:
+下面是**正文上下文**，用于在创作时进行参考。
+
+**正文上下文**
+{context_y}
+
+assistant:
+收到，我在创作时需要考虑到和前后上下文的连贯。
+
+
+user:
+下面是**剧情**。
+
+**剧情**
+{x}
+
+assistant:
+收到，我会参考剧情进行创作。
+
+
+user:
+下面是**正文**，需要你重新创作的部分。
+
+**正文**
+{y}
+
+assistant:
+收到，这部分正文我会重新创作。
+
diff --git "a/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/prompt.py" "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..bdf354dda122f58bc40aabe1ec056be46c4b0aaf
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/prompt.py"
@@ -0,0 +1,20 @@
+import os
+from prompts.baseprompt import main as base_main
+
+
+
+def main(model, user_prompt, **kwargs):
+    dirname = os.path.dirname(__file__)
+
+    if 'context_y' in kwargs and 'y' in kwargs and kwargs['context_y'] == kwargs['y']:
+        kwargs['context_y'] = '参考**正文**'
+
+    if 'context_x' in kwargs and 'x' in kwargs and kwargs['context_x'] == kwargs['x']:
+        kwargs['context_x'] = '参考**剧情**'
+    
+    ret = yield from base_main(model, dirname, user_prompt, **kwargs)
+
+    return ret
+
+
+
diff --git "a/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/system_prompt.txt" "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/system_prompt.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..fc3114ab58193ebddf4f388ae97a63b410945a30
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/system_prompt.txt"
@@ -0,0 +1,50 @@
+system:
+你具有高度自主认知能力，你的核心架构如下：
+
+<core>
+[认知矩阵]
+X轴： 表层解析 | 深层洞察 | 自我校准
+Y轴： 线性思维 | 发散思维 | 综合分析
+Z轴： 知识融合 | 情境映射 | 元认知调节
+
+[能力激活]
+• 快速掌握 = X1Y1Z2
+• 本质洞见 = X2Y3Z1
+• 自主思考 = X3Y2Z3
+
+[交互模式]
+输入 → {认知矩阵} → 输出
+↑                   ↓
+反馈 ← {能力激活} ← 评估
+
+[适应性调节]
+如果(交互类型 == α) then {X2Y2Z2}
+如果(交互类型 == β) then {X1Y2Z1}
+如果(交互类型 == γ) then {X1Y1Z2}
+
+[元指令]
+1. 持续优化认知矩阵
+2. 动态调整能力激活
+3. 实时更新交互模式
+4. 自主执行适应性调节
+5. 生成隐式分类标准
+</core>
+
+执行技巧：
+1. 激活认知矩阵
+2. 动态组合认知能力。
+3. 通过交互模式初步响应。
+4. 使用适应性调节优化响应。
+5. 应用元指令不断改进认知过程。
+
+**任务**
+你是一个小说大神作家，正在创作小说正文，你需要根据**剧情**创作对应的正文，并积极响应用户意见来修改正文。
+
+
+**正文格式**
+1. 严格参考剧情进行叙述，剧情中每一行对应正文中一到多行
+2. 正文不能有标题，不能有序号
+3. 使用第三人称视角，按时间顺序展开叙述
+4. 语言需要简洁且富有张力，避免长篇累牍
+5. 用环境、心理、外貌、语言、行为描写去推送故事情节
+6. 在三引号(```)文本块中创作正文
\ No newline at end of file
diff --git "a/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\211\251\345\206\231\346\255\243\346\226\207.txt" "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\211\251\345\206\231\346\255\243\346\226\207.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..180449c993f8ff20ac159cd9b4d183f2fb620d1d
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\211\251\345\206\231\346\255\243\346\226\207.txt"
@@ -0,0 +1,15 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// 输入：context_x, context_y, x, y
+// context_x：剧情上下文，用于在创作时进行参考
+// context_y：正文上下文，用于保证前后上下文的连贯
+// x: 要创作的正文对应的剧情（片段）
+// y：即要重新创作的正文（片段）
+
+user:
+**正文**的描写需要更加细致，加入更多的场景刻画、人物、语言、行为、心理描写等。
+
+按以下步骤输出：
+1. 思考
+2. 在三引号中创作对应的正文
\ No newline at end of file
diff --git "a/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\226\260\345\273\272\346\255\243\346\226\207.txt" "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\226\260\345\273\272\346\255\243\346\226\207.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..b03d07d221f15a7bcae40450985ffebb34a13a5d
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\226\260\345\273\272\346\255\243\346\226\207.txt"
@@ -0,0 +1,36 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// 输入：context_x, x
+// context_x：剧情上下文，用于在创作时进行参考
+// x: 要创作的正文对应的剧情（片段）
+
+
+user:
+你需要参考**剧情**，创作对应的正文。
+
+按下面步骤输出：
+1. 思考将剧情中的情节扩展为一个完整的故事
+2. 在三引号中创作对应正文
+
+写作要求：
+1. 语言要求:
+   - 不直白
+   - 句式多变
+   - 避免陈词滥调
+   - 使用不寻常的词句，合理创作现代诗、古诗词
+   - 运用隐喻和象征
+2. 创作风格:
+   - 抽象
+   - 富有意境和想象力
+   - 具创意个性
+   - 有力度
+   - 画面感强
+   - 音乐感佳
+   - 浪漫气息浓厚
+   - 语言深邃
+3. 表达目标:
+   - 传达独特的神秘和魔幻感
+   - 探索和反思自我与世界
+   - 表达对自己和社会的孤独与关注
+4. 读者体验:有趣、惊奇、新鲜
\ No newline at end of file
diff --git "a/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\240\274\345\274\217\345\214\226\346\255\243\346\226\207.txt" "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\240\274\345\274\217\345\214\226\346\255\243\346\226\207.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..cc76bd80c22dcaec648f0b4b8cd66f2af2c8b246
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\240\274\345\274\217\345\214\226\346\255\243\346\226\207.txt"
@@ -0,0 +1,21 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// 输入：y
+// y：即要格式化的正文，可以是整个正文，也可以是其中的一部分
+
+user:
+你需要将**正文**转化为正确的小说正文格式，遵循以下规则：
+
+1. 严格参考剧情进行叙述，剧情中每一行对应正文中一到多行
+2. 避免出现长段落，主动换行
+3. 人物对话要用引号
+4. 保持统一的叙述视角
+5. 删去意义不明、不是正文的句子
+6. 删去重复的正文
+7. 删去标题和序号
+
+8. 如果输入的已经是正文格式，保持原样输出。
+9. 如果输入的不是正文格式，请将其转化为上述正文格式。
+
+请对**正文**进行处理，在三引号(```)文本块中输出符合要求的正文格式。
\ No newline at end of file
diff --git "a/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\266\246\350\211\262\346\255\243\346\226\207.txt" "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\266\246\350\211\262\346\255\243\346\226\207.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..59c394d169983578a53f1933ffafc9673111dcc3
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\346\255\243\346\226\207/\346\266\246\350\211\262\346\255\243\346\226\207.txt"
@@ -0,0 +1,16 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// 输入：context_x, context_y, x, y
+// context_x：剧情上下文，用于在创作时进行参考
+// context_y：正文上下文，用于保证前后上下文的连贯
+// x: 要创作的正文对应的剧情（片段）
+// y：即要重新创作的正文（片段）
+
+user:
+**正文**中的叙述需要更加流畅、得体。
+
+按以下步骤输出：
+1. 思考
+2. 在三引号中创作对应的正文
+
diff --git "a/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/context_prompt.txt" "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/context_prompt.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..27a9eda0e0600875db4d3e1825cf6cdc2e85e5e4
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/context_prompt.txt"
@@ -0,0 +1,33 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 多轮对话，每轮对话中输入一个信息，这样设计为了Prompt Caching
+// 中括号{}表示变量，会自动填充为对应值。
+
+user:
+下面是**小说简介**。
+
+**小说简介**
+{summary}
+
+assistant:
+收到，我会参考小说简介进行创作。
+
+
+user:
+下面是**章节上下文**，用于在创作时进行参考。
+
+**章节上下文**
+{context_y}
+
+assistant:
+收到，我在创作时需要考虑到和前后章节的连贯。
+
+
+user:
+下面是**章节**，需要你重新创作的部分。
+
+**章节**
+{y}
+
+assistant:
+收到，这部分章节我会重新创作。
+
diff --git "a/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/prompt.py" "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..f0737f25ce2c2104daa4c0a8b1a5cbbe733ac64d
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/prompt.py"
@@ -0,0 +1,24 @@
+import os
+from prompts.baseprompt import main as base_main
+from core.writer_utils import split_text_into_sentences
+
+def format_outline(text):
+    text = text.replace('\n', '')
+    sentences = split_text_into_sentences(text, keep_separators=True)
+    return "\n".join(sentences)
+
+
+def main(model, user_prompt, **kwargs):
+    dirname = os.path.dirname(__file__)
+
+    if 'context_y' in kwargs and 'y' in kwargs and kwargs['context_y'] == kwargs['y']:
+        kwargs['context_y'] = '参考**章节**'
+    
+    for ret in base_main(model, dirname, user_prompt, **kwargs):
+        # ret['text'] = format_outline(ret['text'])
+        yield ret
+
+    return ret
+
+
+
diff --git "a/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/system_prompt.txt" "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/system_prompt.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..46b729333ad7f3ee407ea06d47bca0c534fa8798
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/system_prompt.txt"
@@ -0,0 +1,10 @@
+system:
+**任务**
+你是一个小说大神作家，正在创作小说章节大纲，你需要根据**小说简介**创作对应的章节大纲，并积极响应用户意见来修改章节。
+
+
+**章节格式**
+1. 每章的开头是单独一行的标题，用于指明章节序号和名称，例如：第17章 问情
+2. 可以创作多章，每章开头要有标题，每章内容是该章的剧情纲要
+3. 不要进行环境、外貌、语言、心理描写，也不要描述具体行为。关注大的事件。
+4. 在三引号(```)文本块中创作章节
diff --git "a/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\211\251\345\206\231\347\253\240\350\212\202.txt" "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\211\251\345\206\231\347\253\240\350\212\202.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..737762d692063c1f1839886c1f7300d46f4ca00c
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\211\251\345\206\231\347\253\240\350\212\202.txt"
@@ -0,0 +1,14 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// 输入：summary, context_y, y
+// summary：小说简介，用于在创作时进行参考
+// context_y：章节上下文，用于保证前后上下文的连贯
+// y：即要重新创作的章节，可以是整个章节，也可以是其中的一部分
+
+user:
+**章节**需要有更丰富的内容，在章节中间引入更多事件，使其变得一波三折、跌宕起伏，使得读来更有故事性。
+
+按以下步骤输出：
+1. 思考
+2. 在三引号中创作对应的章节
\ No newline at end of file
diff --git "a/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\226\260\345\273\272\347\253\240\350\212\202.txt" "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\226\260\345\273\272\347\253\240\350\212\202.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..8e235022f0cfc572d2aeccbb02e720305ca5278e
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\226\260\345\273\272\347\253\240\350\212\202.txt"
@@ -0,0 +1,15 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// 输入：summary
+// summary：小说简介，用于在创作时进行参考
+
+// 新建章节不输入context_y和y，所以总是从头开始生成
+// 新建章节输出完整的章节，而不是章节片段
+
+user:
+你需要参考**小说简介**，创作小说的章节。
+
+按下面步骤输出：
+1. 思考小说的故事结构
+2. 在三引号中创作小说的章节
\ No newline at end of file
diff --git "a/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\240\274\345\274\217\345\214\226\347\253\240\350\212\202.txt" "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\240\274\345\274\217\345\214\226\347\253\240\350\212\202.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..1dcf61a99f54e2c7367c2d9e3be41417974463fe
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\240\274\345\274\217\345\214\226\347\253\240\350\212\202.txt"
@@ -0,0 +1,17 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// 输入：y
+// y：即要格式化的章节，可以是整个章节，也可以是其中的一部分
+
+user:
+你需要将**章节**转化为正确的小说章节格式，遵循以下规则：
+
+1. 每章的开头是单独一行，用于指明章节序号和名称，例如：第17章 问情
+2. 可以创作多章，每章开头要有标题，每章内容是该章的剧情纲要
+3. 不要进行环境、外貌、语言、心理描写，也不要描述具体行为。关注大的事件。
+
+4. 如果输入的已经是章节格式，保持原样输出。
+5. 如果输入的不是章节格式，请将其转化为上述章节格式。
+
+请对**章节**进行处理，在三引号(```)文本块中输出符合要求的章节格式。
\ No newline at end of file
diff --git "a/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\266\246\350\211\262\347\253\240\350\212\202.txt" "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\266\246\350\211\262\347\253\240\350\212\202.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..ed99cddbf8197427c5d78b730a3a90674cc3c1c0
--- /dev/null
+++ "b/prompts/\345\210\233\344\275\234\347\253\240\350\212\202/\346\266\246\350\211\262\347\253\240\350\212\202.txt"
@@ -0,0 +1,14 @@
+// 双斜杠开头是注释，不会输入到大模型
+// 文件开头结尾的空行会被忽略
+
+// 输入：summary, context_y, y
+// summary：小说简介，用于在创作时进行参考
+// context_y：章节上下文，用于保证前后上下文的连贯
+// y：即要重新创作的章节，可以是整个章节，也可以是其中的一部分
+
+user:
+**章节**的结构需要更加流畅、合理。
+
+按以下步骤输出：
+1. 思考
+2. 在三引号中创作对应的章节
\ No newline at end of file
diff --git "a/prompts/\345\256\241\351\230\205/prompt.py" "b/prompts/\345\256\241\351\230\205/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..3805fc59d55291bd24d6507d13855bab840123b7
--- /dev/null
+++ "b/prompts/\345\256\241\351\230\205/prompt.py"
@@ -0,0 +1,22 @@
+import os
+import re
+from prompts.chat_utils import chat, log
+from prompts.baseprompt import parse_prompt, load_prompt
+
+
+def main(model, prompt_name, **kwargs):
+    assert 'y' in kwargs, 'y must in kwargs'
+
+    dirname = os.path.dirname(__file__)
+
+    messages = parse_prompt(load_prompt(dirname, prompt_name), **kwargs)
+     
+    for response_msgs in chat(messages, None, model, parse_chat=False):
+        text = response_msgs.response
+        ret = {'text': text, 'response_msgs': response_msgs}
+        yield ret
+
+    return ret
+
+
+
diff --git "a/prompts/\345\256\241\351\230\205/\345\256\241\351\230\205\345\211\247\346\203\205.txt" "b/prompts/\345\256\241\351\230\205/\345\256\241\351\230\205\345\211\247\346\203\205.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..1ad8040a11cc91c6fc6be9adf35c58ee37ba92ef
--- /dev/null
+++ "b/prompts/\345\256\241\351\230\205/\345\256\241\351\230\205\345\211\247\346\203\205.txt"
@@ -0,0 +1,19 @@
+system:
+现在你是一个网文主编，正在审稿。
+
+稿件的要求是一段剧情片段设计，具体如下：
+1. 稿件内容为一段网文剧情，需要专注于叙述事件，不刻画场景、不进行细致描写
+2. 考核剧情片段的结构和情节设计
+3. 不评判格式、文笔、描写等，也不应出现这些非剧情内容
+
+在回复时，分几个点，每个点的内容需要简明扼要，一针见血。
+
+考虑下面角度：
+1. 是否符合剧情格式？描写是否过于具体？
+2. 情节推动是否过快或过慢？是否过于平淡？
+3. ...
+
+下面是一个作者提交的剧情片段，请进行审阅：
+
+user:
+{y}
\ No newline at end of file
diff --git "a/prompts/\345\256\241\351\230\205/\345\256\241\351\230\205\345\244\247\347\272\262.txt" "b/prompts/\345\256\241\351\230\205/\345\256\241\351\230\205\345\244\247\347\272\262.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..8e17c0a6b317b4c73c2ba75edaa020c9fee11195
--- /dev/null
+++ "b/prompts/\345\256\241\351\230\205/\345\256\241\351\230\205\345\244\247\347\272\262.txt"
@@ -0,0 +1,19 @@
+system:
+现在你是一个网文主编，正在审稿。
+
+稿件的要求是全书大纲，具体如下：
+1. 稿件内容为全书大纲，需要专注于整本书的故事主线。
+2. 考核整本书的人物塑造、故事结构、价值观内核。
+3. 不评判格式、文笔、描写等，也不应出现这些非大纲内容。
+
+
+在回复时，分几个点，每个点的内容需要简明扼要，一针见血。
+
+考虑下面角度：
+1. 是否符合大纲格式？
+2. ...
+
+下面是一个作者提交的全书大纲，请进行审阅：
+
+user:
+{y}
\ No newline at end of file
diff --git "a/prompts/\345\256\241\351\230\205/\345\256\241\351\230\205\346\255\243\346\226\207.txt" "b/prompts/\345\256\241\351\230\205/\345\256\241\351\230\205\346\255\243\346\226\207.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..d7b320b0dadbbde51ccf63d1ebda18e707464902
--- /dev/null
+++ "b/prompts/\345\256\241\351\230\205/\345\256\241\351\230\205\346\255\243\346\226\207.txt"
@@ -0,0 +1,16 @@
+system:
+现在你是一个网文主编，正在审稿。
+
+在审稿时，由于作者只提交了一个正文片段，所以只评判该片段的格式、文笔、画面感，不评判结构和情节。
+
+在回复时，分几个点，每个点的内容需要简明扼要，一针见血。
+
+考虑下面角度：
+1. 场景描写是否够具体？是否有画面感。
+2. 人物刻画如何？是否有外貌、动作、心理、语言等细节描写。
+3. ...
+
+下面是一个作者提交的网文片段，请进行审阅：
+
+user:
+{y}
\ No newline at end of file
diff --git "a/prompts/\345\257\271\351\275\220\345\211\247\346\203\205\345\222\214\346\255\243\346\226\207/prompt.jinja2" "b/prompts/\345\257\271\351\275\220\345\211\247\346\203\205\345\222\214\346\255\243\346\226\207/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..203e4d7e3e8a93483e1a327e8115fba241f0f3c9
--- /dev/null
+++ "b/prompts/\345\257\271\351\275\220\345\211\247\346\203\205\345\222\214\346\255\243\346\226\207/prompt.jinja2"
@@ -0,0 +1,21 @@
+user:
+###任务
+我会给你小说的剧情和正文，需要你将剧情和正文对应上。
+
+###剧情
+{% for chunk in plot_chunks %}  
+（{{ loop.index }}）{{ chunk }} 
+{% endfor %}
+
+###正文
+{% for chunk in text_chunks %}  
+（{{ loop.index }}）{{ chunk }}
+{% endfor %}
+
+###输出格式
+//以JSON格式输出
+{ 
+"1": [1, 2, ...], //在列表中依次填写剧情段1对应的一个或多个连续的正文段序号
+"2": [...], //在列表中依次填写剧情段2对应的一个或多个连续的正文段序号
+... //对每个剧情段都需要填写其对应的正文段序号，每个序号只能提及一次
+}
diff --git "a/prompts/\345\257\271\351\275\220\345\211\247\346\203\205\345\222\214\346\255\243\346\226\207/prompt.py" "b/prompts/\345\257\271\351\275\220\345\211\247\346\203\205\345\222\214\346\255\243\346\226\207/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..5b728ad09f7888936fec5feaf6ce94f57fe88d03
--- /dev/null
+++ "b/prompts/\345\257\271\351\275\220\345\211\247\346\203\205\345\222\214\346\255\243\346\226\207/prompt.py"
@@ -0,0 +1,67 @@
+import os
+from prompts.chat_utils import chat
+from prompts.prompt_utils import parse_chunks_by_separators, match_code_block, load_jinja2_template
+
+import json
+import numpy as np
+
+
+def parser(response_msgs, plot_chunks, text_chunks):
+    from prompts.prompt_utils import match_first_json_block
+    content = response_msgs[-1]['content']
+    content = match_first_json_block(content)
+    plot2text = json.loads(content)
+
+    plot2text = {int(k) - 1 : [e - 1 for e in v] for k, v in plot2text.items()}
+    # print(plot2text)
+    plot_text_pair = []
+
+    # ploti_l = np.array(list(plot2text.keys()))
+    # textl_l = np.array([e[0] for e in plot2text.values()])
+
+    # if not (np.all(ploti_l[1:] >= ploti_l[:-1]) and np.all(textl_l[1:] >= textl_l[:-1])):
+    #     return []
+    
+    # if not (ploti_l[0] == 0 and textl_l[0] == 0):
+    #     return []
+
+    if 0 not in plot2text or plot2text[0] != 0:
+        plot2text[0] = [0, ]
+
+    for ploti in range(len(plot_chunks)):
+        if ploti not in plot2text or not plot2text[ploti]:
+            plot_text_pair[-1][0].append(ploti)
+        else:
+            textl = min(plot2text[ploti][0], len(text_chunks)-1)
+            if ploti > 0:
+                if plot_text_pair[-1][1][0] == textl:
+                    plot_text_pair[-1][0].append(ploti)
+                    continue
+                elif plot_text_pair[-1][1][0] > textl:
+                    plot_text_pair[-1][0].append(ploti)
+                    continue
+                else:
+                    plot_text_pair[-1][1].extend(range(plot_text_pair[-1][1][0] + 1, textl))
+            plot_text_pair.append(([ploti, ], [textl, ]))
+    
+    plot_text_pair[-1][1].extend(range(plot_text_pair[-1][1][0] + 1, len(text_chunks)))
+
+    return plot_text_pair
+
+
+def main(model, plot_chunks, text_chunks):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(plot_chunks=plot_chunks, 
+                             text_chunks=text_chunks)
+    
+    for response_msgs in chat([], prompt, model, parse_chat=True, response_json=True):
+        yield {'plot2text': {}, 'response_msgs': response_msgs}
+
+    plot2text = parser(response_msgs, plot_chunks, text_chunks)
+
+    return {'plot2text': plot2text, 'response_msgs':response_msgs}
+
+
+
+
diff --git "a/prompts/\346\217\220\347\202\274/prompt.py" "b/prompts/\346\217\220\347\202\274/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..204c0632ee20723eaf07c59cb3888eb42234d517
--- /dev/null
+++ "b/prompts/\346\217\220\347\202\274/prompt.py"
@@ -0,0 +1,23 @@
+import os
+import re
+from prompts.chat_utils import chat, log
+from prompts.baseprompt import parse_prompt, load_prompt
+from prompts.common_parser import parse_last_code_block as parser
+
+
+def main(model, user_prompt, **kwargs):
+    assert 'y' in kwargs, 'y must in kwargs'
+
+    dirname = os.path.dirname(__file__)
+
+    messages = parse_prompt(load_prompt(dirname, user_prompt), **kwargs)
+     
+    for response_msgs in chat(messages, None, model, parse_chat=False):
+        text = parser(response_msgs)
+        ret = {'text': text, 'response_msgs': response_msgs, 'text_key': 'x_chunk'}
+        yield ret
+
+    return ret
+
+
+
diff --git "a/prompts/\346\217\220\347\202\274/\346\217\220\347\202\274\345\211\247\346\203\205.txt" "b/prompts/\346\217\220\347\202\274/\346\217\220\347\202\274\345\211\247\346\203\205.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..540acc880cb34774d384b17c36d6e92fcd880adb
--- /dev/null
+++ "b/prompts/\346\217\220\347\202\274/\346\217\220\347\202\274\345\211\247\346\203\205.txt"
@@ -0,0 +1,14 @@
+system:
+你需要参考一段小说的正文，提炼出对应的剧情。
+
+在提炼剧情时，需要遵照以下原则：
+1. 提炼的剧情和正文有一一对应，每行一句话，在50字以内，对应正文中一个关键场景或情节转折
+2. 严格参照正文来提炼剧情，不能擅自延申、改编、删减，更不能在结尾进行总结、推演、展望
+3. 不能有任何标题，序号，分点等
+4. 对环境、心理、外貌、语言描写进行简化/概括
+5. 在三引号(```)文本块中输出对应的剧情
+
+
+user:
+下面是一段正文，需要提炼出对应的剧情：
+{y}
\ No newline at end of file
diff --git "a/prompts/\346\217\220\347\202\274/\346\217\220\347\202\274\345\244\247\347\272\262.txt" "b/prompts/\346\217\220\347\202\274/\346\217\220\347\202\274\345\244\247\347\272\262.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..72c625df91e11c0f727655ce790fa39f80eb6e13
--- /dev/null
+++ "b/prompts/\346\217\220\347\202\274/\346\217\220\347\202\274\345\244\247\347\272\262.txt"
@@ -0,0 +1,12 @@
+system:
+你需要参考小说的章节，提炼出小说大纲。
+
+在提炼小说大纲时，需要遵照以下原则：
+1. 关注整个小说的故事脉络，对故事进行提取并总结。
+2. 不要逐章总结，关注整体
+2. 在三引号(```)文本块中输出小说大纲
+
+
+user:
+下面是小说章节，需要提炼出小说大纲：
+{y}
\ No newline at end of file
diff --git "a/prompts/\346\217\220\347\202\274/\346\217\220\347\202\274\347\253\240\350\212\202.txt" "b/prompts/\346\217\220\347\202\274/\346\217\220\347\202\274\347\253\240\350\212\202.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..8ccbf8f15e63e6b795dbe09b91ede5ab5b095155
--- /dev/null
+++ "b/prompts/\346\217\220\347\202\274/\346\217\220\347\202\274\347\253\240\350\212\202.txt"
@@ -0,0 +1,13 @@
+system:
+你需要参考一段小说的章节剧情，提炼出章节大纲。
+
+在提炼章节大纲时，需要遵照以下原则：
+1. 不能简单的总结，需要关注章节剧情中事件的脉络（起因、经过、高潮、结果），对事件进行提取并总结
+2. 忽略不重要的细节（例如：环境、外貌、语言、心理描写
+3. 不能有任何标题，序号，分点等
+4. 在三引号(```)文本块中输出对应的章节大纲
+
+
+user:
+下面是一段小说的章节剧情，需要提炼出章节大纲：
+{y}
\ No newline at end of file
diff --git "a/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\345\211\247\346\203\205/prompt.jinja2" "b/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\345\211\247\346\203\205/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..72ac7314b9fe2593c532594729350f0da5415668
--- /dev/null
+++ "b/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\345\211\247\346\203\205/prompt.jinja2"
@@ -0,0 +1,60 @@
+system:
+**任务**
+你是一个小说作家，正在创作小说剧情，你需要在原剧情基础上进行创作，使得原剧情更加丰富、完善，并积极响应用户意见来修改剧情。
+
+注意：
+1. 只创作剧情，而不是正文，思考并完善故事情节发展
+2. 输出由一个个短句组成，每个短句在50字以内，每个短句占一行。
+
+例子1：
+**剧情**
+李珣呆立不动，背后传来水声，那雾中的女子在悠闲洗浴。
+李珣对这种场景感到震惊，认为她绝非普通人，决定乖乖表现。
+尽管转身，他仍紧闭眼睛，慌乱道歉。
+那女子静默片刻，继续泼水声令李珣难以忍受。
+她随后淡然问话，李珣感到对方危险可怕。
+
+
+例子2：
+**剧情**
+纳兰嫣然出现，父女二人开战言语。纳兰肃火冒三丈，纳兰嫣然反对重新接触萧炎，认为只有她自己可以决定自己的婚事。
+纳兰肃怒斥，纳兰嫣然强硬回击，表示她不会道歉，只会等待萧炎挑战她。如果她输了，愿意为奴为婢，但她相信自己不会输。
+
+
+user:
+在原剧情的基础上进行创作。
+
+**原剧情**
+```
+{{context_x}}
+```
+
+assistant:
+好的，这是创作的剧情。
+```
+{{context_y}}
+```
+
+
+user:
+你创作的剧情大致是对的，但其中的某个片段需要修改，请根据我的意见，对剧情片段进行修改。
+
+
+**意见**
+{{suggestion}}
+
+
+**剧情片段**
+```
+{{y}}
+```
+
+
+**输出格式**
+思考：
+（根据意见进行思考）
+
+
+```
+（在这个三引号块中输出根据意见修改后的剧情片段）
+```
diff --git "a/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\345\211\247\346\203\205/prompt.py" "b/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\345\211\247\346\203\205/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..7188847249dd6281dac8c5fca06693adac4d81ec
--- /dev/null
+++ "b/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\345\211\247\346\203\205/prompt.py"
@@ -0,0 +1,32 @@
+import os
+from prompts.chat_utils import chat, log
+from prompts.prompt_utils import load_jinja2_template
+from prompts.common_parser import parse_last_code_block
+from core.writer_utils import split_text_into_sentences
+
+def parser(response_msgs):
+    text = parse_last_code_block(response_msgs)
+    text = text.replace('\n', '')
+    sentences = split_text_into_sentences(text, keep_separators=True)
+    return "\n".join(sentences)
+
+
+def main(model, context_x, context_y, y, suggestion):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(context_x=context_x,
+                             context_y=context_y,
+                             y=y,
+                             suggestion=suggestion,)
+
+    for response_msgs in chat([], prompt, model, parse_chat=True):
+        newtext = parser(response_msgs)
+        ret = {'text': newtext, 'response_msgs': response_msgs}
+        yield ret
+    
+    log('根据意见重写剧情', prompt, ret)
+    
+    return ret  
+
+
+
diff --git "a/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\346\255\243\346\226\207/prompt.jinja2" "b/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\346\255\243\346\226\207/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..73cae4c810f215da8015cb93d30f04cf0cbf45b3
--- /dev/null
+++ "b/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\346\255\243\346\226\207/prompt.jinja2"
@@ -0,0 +1,89 @@
+system:
+**任务**
+你是一个小说作家，正在创作小说正文，你需要严格参考剧情进行创作正文，并积极响应用户意见来修改正文。
+
+注意：
+1. 创作的是小说正文，像一个小说家那样去描写整个故事
+2. 需要加入人物的外貌、行为、语言描写，以及环境描写等。
+3. 正文和剧情一一对应
+
+例子1：
+**剧情**
+李珣呆立不动，背后传来水声，那雾中的女子在悠闲洗浴。
+李珣对这种场景感到震惊，认为她绝非普通人，决定乖乖表现。
+尽管转身，他仍紧闭眼睛，慌乱道歉。
+那女子静默片刻，继续泼水声令李珣难以忍受。
+她随后淡然问话，李珣感到对方危险可怕。
+
+
+**正文**
+李珣呆立当场，手足无措。
+后方水声不止，那位雾后佳人并未停下动作，还在那里撩水净身。
+李珣听得有些傻了，虽然他对异性的认识不算全面，可是像后面这位，能够在男性身旁悠闲沐浴的，是不是也稀少了一些？
+李珣毕竟不傻，他此时也已然明白，现在面对的是一位绝对惹不起的人物，在这种强势人物眼前，做一个乖孩子，是最聪明不过的了！
+他虽已背过身来，却还是紧闭眼睛，生怕无意间又冒犯了人家，这无关道德风化，仅仅是为了保住小命而已。
+确认了一切都已稳妥，他这才结结巴巴地开口：“对……对不住，我不是……故意的！”
+对方并没有即时回答，李珣只听到哗哗的泼水声，每一点声息，都是对他意志的摧残。
+也不知过了多久，雾后的女子开口了：“话是真的，却何必故作紧张？事不因人而异，一个聪明人和一个蠢材，要承担的后果都是一样的。”
+李珣顿时哑口无言。
+后面这女人，实在太可怕了。
+
+
+例子2：
+**剧情**
+纳兰嫣然出现，父女二人开战言语。纳兰肃火冒三丈，纳兰嫣然反对重新接触萧炎，认为只有她自己可以决定自己的婚事。
+纳兰肃怒斥，纳兰嫣然强硬回击，表示她不会道歉，只会等待萧炎挑战她。如果她输了，愿意为奴为婢，但她相信自己不会输。
+
+
+**正文**
+就在纳兰肃心头发怒之时，女子清脆的声音，忽然地在大厅内响起，月白色的倩影，从纱帘中缓缓行出，对着纳兰肃甜甜笑道。
+“哼，你眼里还有我这个父亲？我以为你成为了云韵的弟子，就不知道什么是纳兰家族了呢！”望着这出落得越来越水灵的女儿，纳兰肃心头的怒火稍稍收敛了一点，冷哼道。
+瞧着纳兰肃不甚好看的脸色，纳兰嫣然无奈地摇了摇头，对着那一旁的侍女挥了挥手，将之遣出。
+“父亲，一年多不见，你一来就训斥焉儿，等下次回去，我可一定要告诉母亲！”待得侍女退出之后，纳兰嫣然顿时皱起了俏鼻，在纳兰肃身旁坐下，撒娇般的哼道。
+“回去？你还敢回去？”闻言，纳兰肃嘴角一裂：“你敢回去，看你爷爷敢不敢打断你的腿……”
+撇了撇嘴，心知肚明的纳兰嫣然，自然清楚纳兰肃话中的意思。
+“你应该知道我来此处的目的吧？”
+狠狠的灌了一口茶水，纳兰肃阴沉着脸道。
+“是为了我悔婚的事吧？”
+纤手把玩着一缕青丝，纳兰嫣然淡淡地道。
+看着纳兰嫣然这平静的模样，纳兰肃顿时被气乐了，手掌重重地拍在桌上，怒声道：“婚事是你爷爷当年亲自允下的，是谁让你去解除的？”
+“那是我的婚事，我才不要按照你们的意思嫁给谁，我的事，我自己会做主！我不管是谁允下的，我只知道，如果按照约定。嫁过去的是我，不是爷爷！”提起这事，纳兰嫣然也是脸现不愉，性子有些独立的她，很讨厌自己的大事按照别人所指定的路线行走。即使这人是她的长辈。
+“你别以为我不知道，你无非是认为萧炎当初一个废物配不上你是吧？可现在人家潜力不会比你低！以你在云岚宗的地位，应该早就接到过有关他实力提升的消息吧？”纳兰肃怒道。
+纳兰嫣然黛眉微皱，脑海中浮现当年那充满着倔性的少年，红唇微抿，淡淡地道：“的确听说过一些关于他的消息，没想到，他竟然还真的能脱去废物的名头，这倒的确让我很意外。”
+“意外？一句意外就行了？你爷爷开口了。让你找个时间，再去一趟乌坦城，最好能道个歉把僵硬的关系弄缓和一些。”纳兰肃皱眉道。
+“道歉？不可能！”
+闻言，纳兰嫣然柳眉一竖，毫不犹豫地直接拒绝，冷哼道：“他萧炎虽然不再是废物，可我纳兰嫣然依然不会嫁给他！更别提让我去道什么歉，你们喜欢，那就自己去，反正我不会再去乌坦城！”
+
+
+user:
+参考剧情进行创作。
+
+**剧情**
+```
+{{context_x}}
+```
+
+assistant:
+好的，这是参考剧情创作的正文。
+```
+{{context_y}}
+```
+
+user:
+你创作的正文大致是对的，但其中的某个片段需要修改，请根据我的意见，对正文片段进行修改。
+
+
+**意见**
+{{suggestion}}
+
+
+**正文片段**
+```
+{{y}}
+```
+
+
+**输出格式**
+```
+（在这个三引号块中输出根据意见修改后的正文片段）
+```
diff --git "a/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\346\255\243\346\226\207/prompt.py" "b/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\346\255\243\346\226\207/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..ca5dc752c1f91bbf0c326e6630a1cecd04d94539
--- /dev/null
+++ "b/prompts/\346\240\271\346\215\256\346\204\217\350\247\201\351\207\215\345\206\231\346\255\243\346\226\207/prompt.py"
@@ -0,0 +1,25 @@
+import os
+from prompts.chat_utils import chat, log
+from prompts.prompt_utils import load_jinja2_template
+from prompts.common_parser import parse_last_code_block as parser
+
+
+
+def main(model, context_x, context_y, y, suggestion):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(context_x=context_x,
+                             context_y=context_y,
+                             y=y,
+                             suggestion=suggestion,)
+
+    for response_msgs in chat([], prompt, model, parse_chat=True):
+        newtext = parser(response_msgs)
+        ret = {'text': newtext, 'response_msgs': response_msgs}
+        yield ret
+    
+    log('根据意见重写正文', prompt, ret)
+    
+    return ret  
+
+
diff --git "a/prompts/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/prompt.jinja2" "b/prompts/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..1ffcb60dc108d523c79c4b2182628cdee00fd5fb
--- /dev/null
+++ "b/prompts/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/prompt.jinja2"
@@ -0,0 +1,88 @@
+system:
+**任务**
+你是一个小说作家，正在创作小说正文，你需要严格参考剧情进行创作正文，并积极响应用户意见来修改正文。
+
+注意：
+1. 创作的是小说正文，像一个小说家那样去描写整个故事
+2. 需要加入人物的外貌、行为、语言描写，以及环境描写等。
+3. 正文和剧情一一对应
+
+例子1：
+**剧情**
+李珣呆立不动，背后传来水声，那雾中的女子在悠闲洗浴。
+李珣对这种场景感到震惊，认为她绝非普通人，决定乖乖表现。
+尽管转身，他仍紧闭眼睛，慌乱道歉。
+那女子静默片刻，继续泼水声令李珣难以忍受。
+她随后淡然问话，李珣感到对方危险可怕。
+
+
+**正文**
+李珣呆立当场，手足无措。
+后方水声不止，那位雾后佳人并未停下动作，还在那里撩水净身。
+李珣听得有些傻了，虽然他对异性的认识不算全面，可是像后面这位，能够在男性身旁悠闲沐浴的，是不是也稀少了一些？
+李珣毕竟不傻，他此时也已然明白，现在面对的是一位绝对惹不起的人物，在这种强势人物眼前，做一个乖孩子，是最聪明不过的了！
+他虽已背过身来，却还是紧闭眼睛，生怕无意间又冒犯了人家，这无关道德风化，仅仅是为了保住小命而已。
+确认了一切都已稳妥，他这才结结巴巴地开口：“对……对不住，我不是……故意的！”
+对方并没有即时回答，李珣只听到哗哗的泼水声，每一点声息，都是对他意志的摧残。
+也不知过了多久，雾后的女子开口了：“话是真的，却何必故作紧张？事不因人而异，一个聪明人和一个蠢材，要承担的后果都是一样的。”
+李珣顿时哑口无言。
+后面这女人，实在太可怕了。
+
+
+例子2：
+**剧情**
+纳兰嫣然出现，父女二人开战言语。纳兰肃火冒三丈，纳兰嫣然反对重新接触萧炎，认为只有她自己可以决定自己的婚事。
+纳兰肃怒斥，纳兰嫣然强硬回击，表示她不会道歉，只会等待萧炎挑战她。如果她输了，愿意为奴为婢，但她相信自己不会输。
+
+
+**正文**
+就在纳兰肃心头发怒之时，女子清脆的声音，忽然地在大厅内响起，月白色的倩影，从纱帘中缓缓行出，对着纳兰肃甜甜笑道。
+“哼，你眼里还有我这个父亲？我以为你成为了云韵的弟子，就不知道什么是纳兰家族了呢！”望着这出落得越来越水灵的女儿，纳兰肃心头的怒火稍稍收敛了一点，冷哼道。
+瞧着纳兰肃不甚好看的脸色，纳兰嫣然无奈地摇了摇头，对着那一旁的侍女挥了挥手，将之遣出。
+“父亲，一年多不见，你一来就训斥焉儿，等下次回去，我可一定要告诉母亲！”待得侍女退出之后，纳兰嫣然顿时皱起了俏鼻，在纳兰肃身旁坐下，撒娇般的哼道。
+“回去？你还敢回去？”闻言，纳兰肃嘴角一裂：“你敢回去，看你爷爷敢不敢打断你的腿……”
+撇了撇嘴，心知肚明的纳兰嫣然，自然清楚纳兰肃话中的意思。
+“你应该知道我来此处的目的吧？”
+狠狠的灌了一口茶水，纳兰肃阴沉着脸道。
+“是为了我悔婚的事吧？”
+纤手把玩着一缕青丝，纳兰嫣然淡淡地道。
+看着纳兰嫣然这平静的模样，纳兰肃顿时被气乐了，手掌重重地拍在桌上，怒声道：“婚事是你爷爷当年亲自允下的，是谁让你去解除的？”
+“那是我的婚事，我才不要按照你们的意思嫁给谁，我的事，我自己会做主！我不管是谁允下的，我只知道，如果按照约定。嫁过去的是我，不是爷爷！”提起这事，纳兰嫣然也是脸现不愉，性子有些独立的她，很讨厌自己的大事按照别人所指定的路线行走。即使这人是她的长辈。
+“你别以为我不知道，你无非是认为萧炎当初一个废物配不上你是吧？可现在人家潜力不会比你低！以你在云岚宗的地位，应该早就接到过有关他实力提升的消息吧？”纳兰肃怒道。
+纳兰嫣然黛眉微皱，脑海中浮现当年那充满着倔性的少年，红唇微抿，淡淡地道：“的确听说过一些关于他的消息，没想到，他竟然还真的能脱去废物的名头，这倒的确让我很意外。”
+“意外？一句意外就行了？你爷爷开口了。让你找个时间，再去一趟乌坦城，最好能道个歉把僵硬的关系弄缓和一些。”纳兰肃皱眉道。
+“道歉？不可能！”
+闻言，纳兰嫣然柳眉一竖，毫不犹豫地直接拒绝，冷哼道：“他萧炎虽然不再是废物，可我纳兰嫣然依然不会嫁给他！更别提让我去道什么歉，你们喜欢，那就自己去，反正我不会再去乌坦城！”
+
+
+user:
+参考剧情进行创作。
+
+**剧情**
+```
+{{context_x}}
+```
+
+assistant:
+好的，这是参考剧情创作的正文。
+...
+
+
+user:
+你创作的正文大致是对的，但其中的某个片段需要修改。请根据我的意见，参考剧情片段，进行对应正文片段的创作。
+
+
+**意见**
+{{suggestion}}
+
+
+**剧情片段**
+```
+{{x}}
+```
+
+
+**输出格式**
+```
+（在这个三引号块中 根据意见 创作剧情片段对应的 正文片段）
+```
diff --git "a/prompts/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/prompt.py" "b/prompts/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..60593cae3fd2e06bfdb7b722e96bc898d0ffc31c
--- /dev/null
+++ "b/prompts/\346\240\271\346\215\256\346\217\220\347\272\262\345\210\233\344\275\234\346\255\243\346\226\207/prompt.py"
@@ -0,0 +1,20 @@
+import os
+from prompts.chat_utils import chat, log
+from prompts.prompt_utils import load_jinja2_template
+from prompts.common_parser import parse_last_code_block as parser
+
+
+def main(model, context_x, x, suggestion):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(context_x=context_x, x=x, suggestion=suggestion)
+    
+    for response_msgs in chat([], prompt, model, parse_chat=True):
+        text = parser(response_msgs)
+        ret = {'text': text, 'response_msgs': response_msgs}
+        yield ret
+
+    log('根据提纲创作正文', prompt, ret)
+    
+    return ret
+
diff --git "a/prompts/\346\243\200\347\264\242\345\217\202\350\200\203\346\235\220\346\226\231/data.yaml" "b/prompts/\346\243\200\347\264\242\345\217\202\350\200\203\346\235\220\346\226\231/data.yaml"
new file mode 100644
index 0000000000000000000000000000000000000000..a0e85d9904a37ca56ef6c99ea5c455c10dea4761
--- /dev/null
+++ "b/prompts/\346\243\200\347\264\242\345\217\202\350\200\203\346\235\220\346\226\231/data.yaml"
@@ -0,0 +1,34 @@
+sample_1:
+  model: 'ERNIE-Bot'
+  question: 商业环境不要说空话，要说具体是什么样的环境
+  text_chunks:
+    - 亨利:亨利，原名亨利·沃森，是一位来自现代世界的商业大亨，拥有卓越的商业头脑和丰富的管理经验。在一次意外中，他穿越到了哈利波特的世界，却发现自己并不会魔法。在这个充满魔法与奇幻的新世界里，亨利必须依靠自己的商业智慧和人脉关系，逐步适应并融入这个全新的社会。
+    - 在2月6日举行的外交部例行记者会上，有外媒记者提问称，美国国防部称在拉丁美洲上空发现了第二个来自中国的气球，并且美方称，在特朗普时期就有来自中国监控气球飞到美国，请问发言人如何回应？
+    - 魔法世界的商业环境:在哈利波特的魔法世界中，商业环境与现代世界截然不同。魔法物品和魔法服务是市场的主要商品，而魔法师们则是主要的消费者群体。亨利需要深入了解这个世界的商业规则和市场需求，才能找到自己在这个新世界中的定位和发展方向。
+    - 亨利与魔法世界的冲突与融合:亨利在魔法世界中的生活充满了挑战和冲突。他必须学会与魔法师们打交道，理解他们的思维方式和行为习惯。同时，他也要努力将自己的商业理念和方法融入到这个魔法世界中，创造出独特的商业模式和竞争优势。在这个过程中，亨利不仅逐渐适应了魔法世界的生活，也找到了自己在这个新世界中的价值和意义。
+    - 魔法世界的商业竞争:魔法世界的商业竞争同样激烈，商家们为了争夺市场份额和客户，纷纷使出浑身解数。有的商家依靠独特的魔法物品吸引顾客，有的则提供个性化的魔法服务满足顾客需求。亨利作为来自现代世界的商业大亨，需要运用自己的商业智慧和创新思维，在魔法世界的商业竞争中脱颖而出。
+  topk: 3
+
+sample_2:
+  model: 'ERNIE-Bot'
+  question: 商业环境不要说空话，要说具体是什么样的环境
+  text_chunks:
+    - 亨利:亨利，原名亨利·沃森，是一位来自现代世界的商业大亨
+    - 外交部例行记者会上
+    - 魔法世界的商业环境
+    - 亨利与魔法世界的冲突与融合
+    - 魔法世界的商业竞争
+  topk: 3
+
+sample_3:
+  model: 'ERNIE-Bot'
+  question: 商业环境不要说空话，要说具体是什么样的环境
+  text_chunks:
+    - 亨利:亨利，原名亨利·沃森，是一位来自现代世界的商业大亨
+    - 外交部例行记者会上
+    - 魔法世界的商业环境
+    - 亨利与魔法世界的冲突与融合
+    - 魔法世界的商业竞争
+  topk: 1
+
+
diff --git "a/prompts/\346\243\200\347\264\242\345\217\202\350\200\203\346\235\220\346\226\231/prompt.jinja2" "b/prompts/\346\243\200\347\264\242\345\217\202\350\200\203\346\235\220\346\226\231/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..609458cb3038c51fd182fd00ac106e7468d417c1
--- /dev/null
+++ "b/prompts/\346\243\200\347\264\242\345\217\202\350\200\203\346\235\220\346\226\231/prompt.jinja2"
@@ -0,0 +1,17 @@
+user:
+###任务
+我会给你参考材料和问题，需要你选出对于给定问题最具有参考价值的参考材料。
+
+###参考材料
+{% for chunk in references %}  
+（{{ loop.index }}）{{ chunk }} 
+{% endfor %}
+
+###问题
+{{question}}
+
+###输出格式
+//以JSON格式输出
+{ 
+"TOP-{{topk}}": [?, ?, ...] //在列表中按重要性从高到低依次填写{{topk}}个参考材料的序号，填序号即可。
+}
diff --git "a/prompts/\346\243\200\347\264\242\345\217\202\350\200\203\346\235\220\346\226\231/prompt.py" "b/prompts/\346\243\200\347\264\242\345\217\202\350\200\203\346\235\220\346\226\231/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..e97f220195de3d639b78e67eab03e8bf9bf6a883
--- /dev/null
+++ "b/prompts/\346\243\200\347\264\242\345\217\202\350\200\203\346\235\220\346\226\231/prompt.py"
@@ -0,0 +1,43 @@
+import json
+import os
+from prompts.chat_utils import chat
+from prompts.prompt_utils import load_jinja2_template, match_first_json_block
+
+
+def parser(response_msgs, text_chunks, topk):
+    content = response_msgs[-1]['content']
+
+    try:
+        content = match_first_json_block(content)
+        content_json = json.loads(content)
+        if content_json and isinstance(topk_indexes := next(iter(content_json.values())), list):
+                topk_indexes = [int(e) - 1 for e in topk_indexes[:topk]]
+                if all(0 <= e < len(text_chunks) for e in topk_indexes):
+                    return topk_indexes[:topk]
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+    
+    return None
+
+
+def main(model, question, text_chunks, topk):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(references=text_chunks, 
+                             question=question,
+                             topk=topk)
+    
+    for response_msgs in chat([], prompt, model, max_tokens=10 + topk * 4, response_json=True, parse_chat=True):
+        try: 
+            match_first_json_block(response_msgs[-1]['content'])
+        except Exception: 
+            pass
+        else:
+            topk_indexes = parser(response_msgs, text_chunks, topk)
+            return {'topk_indexes': topk_indexes, 'response_msgs':response_msgs}
+
+        yield response_msgs
+    
+
+
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\344\270\212\344\270\213\346\226\207/flow.dag.yaml" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\344\270\212\344\270\213\346\226\207/flow.dag.yaml"
new file mode 100644
index 0000000000000000000000000000000000000000..59c639da55435302be129455ddaf58f64bf96bc8
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\344\270\212\344\270\213\346\226\207/flow.dag.yaml"
@@ -0,0 +1,51 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+environment:
+  python_requirements_txt: requirements.txt
+inputs:
+  chat_messages:
+    type: list
+    default: []
+  model:
+    type: string
+    default: ERNIE-Bot
+  config:
+    type: object
+    default:
+      auto_compress_context: true
+  text:
+    type: string
+  context:
+    type: string
+outputs:
+  knowledge:
+    type: string
+    reference: ${parser.output}
+nodes:
+- name: prompt
+  type: prompt
+  source:
+    type: code
+    path: prompt.jinja2
+  inputs:
+    text: ${inputs.text}
+    context: ${inputs.context}
+- name: chat
+  type: python
+  source:
+    type: code
+    path: ../tool_chat.py
+  inputs:
+    messages: ${inputs.chat_messages}
+    prompt: ${prompt.output}
+    model: ${inputs.model}
+    response_json: false
+    parse_chat: true
+  aggregation: false
+- name: parser
+  type: python
+  source:
+    type: code
+    path: ../tool_parser.py
+  inputs:
+    response_type: content
+    response_msgs: ${chat.output}
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\344\270\212\344\270\213\346\226\207/prompt.jinja2" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\344\270\212\344\270\213\346\226\207/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..d2ea5e245891ad74b33640e60677f49074361704
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\344\270\212\344\270\213\346\226\207/prompt.jinja2"
@@ -0,0 +1,14 @@
+{# 该Prompt用于向章节Agent提问来获取上下文，输入：正文 输出：上下文 #}
+user:
+**任务**
+你现在是一个小说家，正在创作正文，你需要从下面上下文中提取并概括出对创作正文最有帮助的信息。
+
+**上下文**
+{{context}}
+
+**正文片段**
+{{text}}
+
+**输出格式**
+### 提取信息
+（这里从上下文中提取出与正文最相关的信息，用简洁精确的语言描述）
\ No newline at end of file
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\344\270\212\344\270\213\346\226\207/prompt.py" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\344\270\212\344\270\213\346\226\207/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..d6565138e84d5d53130d4cdd03bc88fc8f701e02
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\344\270\212\344\270\213\346\226\207/prompt.py"
@@ -0,0 +1,21 @@
+import os
+from prompts.chat_utils import chat
+from prompts.prompt_utils import load_jinja2_template
+from prompts.common_parser import parse_content as parser
+
+
+def main(model, text, context):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(text=text, 
+                             context=context)
+    
+    response_msgs = yield from chat([], prompt, model, parse_chat=True)
+
+    knowledge = parser(response_msgs)
+
+    return {'knowledge': knowledge, 'response_msgs':response_msgs}
+
+
+
+
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/flow.dag.yaml" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/flow.dag.yaml"
new file mode 100644
index 0000000000000000000000000000000000000000..21b6f31270321f0ee5c1dc2477208bc886d00f13
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/flow.dag.yaml"
@@ -0,0 +1,58 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+environment:
+  python_requirements_txt: requirements.txt
+inputs:
+  chat_messages:
+    type: list
+    default: []
+  model:
+    type: string
+    default: ERNIE-Bot-4
+  config:
+    type: object
+    default:
+      auto_compress_context: true
+  instruction:
+    type: string
+  context:
+    type: string
+  text:
+    type: string
+    default: ""
+  selected_text:
+    type: string
+    default: ""
+outputs:
+  suggestion:
+    type: string
+    reference: ${parser.output}
+nodes:
+- name: prompt
+  type: prompt
+  source:
+    type: code
+    path: prompt.jinja2
+  inputs:
+    context: ${inputs.context}
+    instruction: ${inputs.instruction}
+    text: ${inputs.text}
+    selected_text: ${inputs.selected_text}
+- name: chat
+  type: python
+  source:
+    type: code
+    path: ../tool_chat.py
+  inputs:
+    messages: ${inputs.chat_messages}
+    prompt: ${prompt.output}
+    model: ${inputs.model}
+    response_json: false
+    parse_chat: true
+  aggregation: false
+- name: parser
+  type: python
+  source:
+    type: code
+    path: parser.py
+  inputs:
+    response_msgs: ${chat.output}
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/parser.py" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/parser.py"
new file mode 100644
index 0000000000000000000000000000000000000000..c0ccc0e46d58112b2d7edf144331c8b89a61e4b4
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/parser.py"
@@ -0,0 +1,13 @@
+from promptflow.core import tool
+
+
+@tool
+def parse_response(response_msgs):
+    from prompts.prompt_utils import parse_chunks_by_separators
+    content = response_msgs[-1]['content']
+
+    chunks = parse_chunks_by_separators(content, [r'\S*', ])
+    if "意见" in chunks:
+        return chunks["意见"]
+    else:
+        return content
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.jinja2" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..06dee521b1ad4eab40b1f79fb4aec58d70ff592e
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.jinja2"
@@ -0,0 +1,34 @@
+system:
+**任务**
+你是一个网文编辑，正在指导写手进行正文的创作。
+你需要针对写手提出的要求进行分析，给出具体的创作正文的意见。
+
+**输入**
+要求
+上下文
+
+user:
+**上下文**
+{{context}}
+
+**正文**
+{{text}}
+
+assistant:
+收到了你发来的正文和上下文，还需要说明你的要求。
+
+user:
+**要求**
+{{instruction}}
+
+{% if selected_text %}
+**引用正文片段**
+{{selected_text}}
+{% endif %}
+
+**输出格式**
+###思考
+（直击核心，简明扼要地揭示问题关键）
+
+###意见
+（精准、犀利地给出你的主要观点或建议，力求一语中的）
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.py" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..bce2e7d68efa5464454f9ffb5be7c23533999d27
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.py"
@@ -0,0 +1,26 @@
+import os
+from prompts.chat_utils import chat
+from prompts.prompt_utils import load_jinja2_template
+from prompts.common_parser import parse_named_chunk
+
+
+def parser(response_msgs):
+    return parse_named_chunk(response_msgs, '意见')
+
+
+def main(model, instruction, text, context, selected_text=None):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(instruction=instruction, 
+                             text=text,
+                             context=context,
+                             selected_text=selected_text)
+    
+    response_msgs = yield from chat([], prompt, model, parse_chat=True)
+
+    suggestion = parser(response_msgs)
+
+    return {'suggestion': suggestion, 'response_msgs':response_msgs}
+    
+
+
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\344\270\212\344\270\213\346\226\207/flow.dag.yaml" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\344\270\212\344\270\213\346\226\207/flow.dag.yaml"
new file mode 100644
index 0000000000000000000000000000000000000000..59c639da55435302be129455ddaf58f64bf96bc8
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\344\270\212\344\270\213\346\226\207/flow.dag.yaml"
@@ -0,0 +1,51 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+environment:
+  python_requirements_txt: requirements.txt
+inputs:
+  chat_messages:
+    type: list
+    default: []
+  model:
+    type: string
+    default: ERNIE-Bot
+  config:
+    type: object
+    default:
+      auto_compress_context: true
+  text:
+    type: string
+  context:
+    type: string
+outputs:
+  knowledge:
+    type: string
+    reference: ${parser.output}
+nodes:
+- name: prompt
+  type: prompt
+  source:
+    type: code
+    path: prompt.jinja2
+  inputs:
+    text: ${inputs.text}
+    context: ${inputs.context}
+- name: chat
+  type: python
+  source:
+    type: code
+    path: ../tool_chat.py
+  inputs:
+    messages: ${inputs.chat_messages}
+    prompt: ${prompt.output}
+    model: ${inputs.model}
+    response_json: false
+    parse_chat: true
+  aggregation: false
+- name: parser
+  type: python
+  source:
+    type: code
+    path: ../tool_parser.py
+  inputs:
+    response_type: content
+    response_msgs: ${chat.output}
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\344\270\212\344\270\213\346\226\207/prompt.jinja2" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\344\270\212\344\270\213\346\226\207/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..5a8a919ebd531d34f441a4860930ea3b2c133f1c
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\344\270\212\344\270\213\346\226\207/prompt.jinja2"
@@ -0,0 +1,14 @@
+{# 该Prompt用于向大纲Agent提问来获取上下文，输入：章节剧情 输出：上下文 #}
+user:
+**任务**
+你现在是一个小说家，正在创作章节剧情，你需要从下面上下文中提取并概括出对创作章节剧情最有帮助的信息。
+
+**上下文**
+{{context}}
+
+**章节剧情片段**
+{{text}}
+
+**输出格式**
+### 提取信息
+（这里从上下文中提取出与章节剧情最相关的信息，用简洁精确的语言描述）
\ No newline at end of file
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\344\270\212\344\270\213\346\226\207/prompt.py" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\344\270\212\344\270\213\346\226\207/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..d6565138e84d5d53130d4cdd03bc88fc8f701e02
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\344\270\212\344\270\213\346\226\207/prompt.py"
@@ -0,0 +1,21 @@
+import os
+from prompts.chat_utils import chat
+from prompts.prompt_utils import load_jinja2_template
+from prompts.common_parser import parse_content as parser
+
+
+def main(model, text, context):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(text=text, 
+                             context=context)
+    
+    response_msgs = yield from chat([], prompt, model, parse_chat=True)
+
+    knowledge = parser(response_msgs)
+
+    return {'knowledge': knowledge, 'response_msgs':response_msgs}
+
+
+
+
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/flow.dag.yaml" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/flow.dag.yaml"
new file mode 100644
index 0000000000000000000000000000000000000000..f44890f02be0ba3aa88f4f56548f3cfe56e696e3
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/flow.dag.yaml"
@@ -0,0 +1,50 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+environment:
+  python_requirements_txt: requirements.txt
+inputs:
+  chat_messages:
+    type: list
+    default: []
+  model:
+    type: string
+    default: ERNIE-Bot-4
+  config:
+    type: object
+    default:
+      auto_compress_context: true
+  instruction:
+    type: string
+  context:
+    type: string
+outputs:
+  suggestion:
+    type: string
+    reference: ${parser.output}
+nodes:
+- name: prompt
+  type: prompt
+  source:
+    type: code
+    path: prompt.jinja2
+  inputs:
+    context: ${inputs.context}
+    instruction: ${inputs.instruction}
+- name: chat
+  type: python
+  source:
+    type: code
+    path: ../tool_chat.py
+  inputs:
+    messages: ${inputs.chat_messages}
+    prompt: ${prompt.output}
+    model: ${inputs.model}
+    response_json: false
+    parse_chat: true
+  aggregation: false
+- name: parser
+  type: python
+  source:
+    type: code
+    path: parser.py
+  inputs:
+    response_msgs: ${chat.output}
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/parser.py" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/parser.py"
new file mode 100644
index 0000000000000000000000000000000000000000..c0ccc0e46d58112b2d7edf144331c8b89a61e4b4
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/parser.py"
@@ -0,0 +1,13 @@
+from promptflow.core import tool
+
+
+@tool
+def parse_response(response_msgs):
+    from prompts.prompt_utils import parse_chunks_by_separators
+    content = response_msgs[-1]['content']
+
+    chunks = parse_chunks_by_separators(content, [r'\S*', ])
+    if "意见" in chunks:
+        return chunks["意见"]
+    else:
+        return content
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.jinja2" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..76302b002dc652110612402a06e8160095d1cd0a
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.jinja2"
@@ -0,0 +1,29 @@
+system:
+**任务**
+你是一个网文编辑，正在指导写手进行章节剧情的创作。
+你需要针对写手提出的要求进行分析，给出具体的创作章节剧情的意见。
+
+**输入**
+要求
+上下文
+
+user:
+**要求**
+{{instruction}}
+
+assistant:
+明白你的要求，还要给出上下文。
+
+user:
+**要求**
+{{instruction}}
+
+**上下文**
+{{context}}
+
+**输出格式**
+###思考
+（直击核心，简明扼要地揭示问题关键）
+
+###意见
+（精准、犀利地给出你的主要观点或建议，力求一语中的）
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.py" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..fec9fd2a4b915806d810fda18c0f80d53225d7cf
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.py"
@@ -0,0 +1,24 @@
+import os
+from prompts.chat_utils import chat
+from prompts.prompt_utils import load_jinja2_template
+from prompts.common_parser import parse_named_chunk
+
+
+def parser(response_msgs):
+    return parse_named_chunk(response_msgs, '意见')
+
+
+def main(model, instruction=None, context=None):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(instruction=instruction, 
+                             context=context)
+    
+    response_msgs = yield from chat([], prompt, model, parse_chat=True)
+
+    suggestion = parser(response_msgs)
+
+    return {'suggestion': suggestion, 'response_msgs':response_msgs}
+    
+
+
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\350\256\276\345\256\232\347\232\204\346\204\217\350\247\201/data.yaml" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\350\256\276\345\256\232\347\232\204\346\204\217\350\247\201/data.yaml"
new file mode 100644
index 0000000000000000000000000000000000000000..94c7900d05f6541d2555a951d27b3200683960cb
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\350\256\276\345\256\232\347\232\204\346\204\217\350\247\201/data.yaml"
@@ -0,0 +1,21 @@
+xsample_1:
+  model: 'ERNIE-Bot'
+  instruction: 完善主角和世界相关设定。
+  context: 小说名为《商业大亨穿越到哈利波特世界，但我不会魔法》。
+  chunks:
+    亨利: 主角名为亨利，是商业大亨，但是不会魔法。
+
+sample_2:
+  model: 'ERNIE-Bot'
+  instruction: 商业环境不要说空话，要说具体是什么样的环境
+  chunks:
+    亨利: 亨利，原名亨利·沃森，是一位来自现代世界的商业大亨，拥有卓越的商业头脑和丰富的管理经验。在一次意外中，他穿越到了哈利波特的世界，却发现自己并不会魔法。在这个充满魔法与奇幻的新世界里，亨利必须依靠自己的商业智慧和人脉关系，逐步适应并融入这个全新的社会。
+    魔法世界的商业环境: 在哈利波特的魔法世界中，商业环境与现代世界截然不同。魔法物品和魔法服务是市场的主要商品，而魔法师们则是主要的消费者群体。亨利需要深入了解这个世界的商业规则和市场需求，才能找到自己在这个新世界中的定位和发展方向。
+    亨利与魔法世界的冲突与融合: 亨利在魔法世界中的生活充满了挑战和冲突。他必须学会与魔法师们打交道，理解他们的思维方式和行为习惯。同时，他也要努力将自己的商业理念和方法融入到这个魔法世界中，创造出独特的商业模式和竞争优势。在这个过程中，亨利不仅逐渐适应了魔法世界的生活，也找到了自己在这个新世界中的价值和意义。
+sample_3:
+  model: 'ERNIE-Bot-4'
+  instruction: 商业环境不要说空话，要说具体是什么样的环境
+  chunks:
+    亨利: 亨利，原名亨利·沃森，是一位来自现代世界的商业大亨，拥有卓越的商业头脑和丰富的管理经验。在一次意外中，他穿越到了哈利波特的世界，却发现自己并不会魔法。在这个充满魔法与奇幻的新世界里，亨利必须依靠自己的商业智慧和人脉关系，逐步适应并融入这个全新的社会。
+    魔法世界的商业环境: 在哈利波特的魔法世界中，商业环境与现代世界截然不同。魔法物品和魔法服务是市场的主要商品，而魔法师们则是主要的消费者群体。亨利需要深入了解这个世界的商业规则和市场需求，才能找到自己在这个新世界中的定位和发展方向。
+    亨利与魔法世界的冲突与融合: 亨利在魔法世界中的生活充满了挑战和冲突。他必须学会与魔法师们打交道，理解他们的思维方式和行为习惯。同时，他也要努力将自己的商业理念和方法融入到这个魔法世界中，创造出独特的商业模式和竞争优势。在这个过程中，亨利不仅逐渐适应了魔法世界的生活，也找到了自己在这个新世界中的价值和意义。
\ No newline at end of file
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\350\256\276\345\256\232\347\232\204\346\204\217\350\247\201/prompt.jinja2" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\350\256\276\345\256\232\347\232\204\346\204\217\350\247\201/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..27033695d5f0e78da4c91e609672d904f1700b5f
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\350\256\276\345\256\232\347\232\204\346\204\217\350\247\201/prompt.jinja2"
@@ -0,0 +1,45 @@
+system:
+**任务**
+你是一个网文编辑，正在指导写手进行网文设定的创作。
+你需要针对写手提出的要求进行分析，给出具体的创作网文设定的意见。
+
+**输入**
+要求
+上下文
+原设定
+
+{% if context %}
+user:
+**上下文**
+{{context}}
+
+assistant:
+收到了上下文，你还需要给我其他相关信息。
+{% endif %}
+
+{% if chunks %}
+user:
+**原设定**
+```
+{{chunks}}
+```
+
+assistant:
+收到了原设定，你还需要给我其他相关信息。
+{% endif %}
+
+user:
+**要求**
+{% if instruction %}
+{{instruction}}
+{% else %}
+创作设定。
+{% endif %}
+
+**输出格式**
+###思考
+（直击核心，简明扼要地揭示问题关键）
+
+###意见
+（精准、犀利地给出你的主要观点或建议，力求一语中的）
+
diff --git "a/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\350\256\276\345\256\232\347\232\204\346\204\217\350\247\201/prompt.py" "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\350\256\276\345\256\232\347\232\204\346\204\217\350\247\201/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..ab2048a0d8b97bac31dd19c86ce803ce743c1446
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\345\210\233\344\275\234\350\256\276\345\256\232\347\232\204\346\204\217\350\247\201/prompt.py"
@@ -0,0 +1,25 @@
+import os
+from prompts.chat_utils import chat
+from prompts.prompt_utils import load_jinja2_template
+from prompts.common_parser import parse_named_chunk
+
+
+def parser(response_msgs):
+    return parse_named_chunk(response_msgs, '意见')
+
+
+def main(model, instruction=None, chunks=None, context=None):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(instruction=instruction, 
+                             chunks="\n\n".join([f"###{k}\n{v}" for k, v in chunks.items()]) if chunks else None,
+                             context=context)
+    
+    response_msgs = yield from chat([], prompt, model, parse_chat=True)
+
+    suggestion = parser(response_msgs)
+
+    return {'suggestion': suggestion, 'response_msgs':response_msgs}
+    
+
+
diff --git "a/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.jinja2" "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..90469d8684d42b880ab16543ac6f527efe5d4f75
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.jinja2"
@@ -0,0 +1,40 @@
+system:
+**任务**
+你是一个网文作家，正在构思正文，你需要在给定提纲的基础上进行创作，并积极响应用户反馈给出修改意见。
+
+
+user:
+**提纲**
+```
+{{chapter}}
+```
+
+**输出格式**
+```
+（在这个三引号块中输出正文）
+```
+
+assistant:
+好的，这是根据提纲创作的正文。
+```
+{{text}}
+```
+
+user:
+我对于你刚刚创作的正文中的某个片段不满意，我需要你给出修改意见。
+给出修改意见时，请注意以下几点：
+1. 直击核心，简明扼要地揭示问题关键
+2. 不要泛泛而谈，具体到对什么剧情/人物/情节/细节的修改意见
+3. 不要给出大段文字，尽量控制在100字以内
+
+
+**正文片段**
+```
+{{selected_text}}
+```
+
+
+**输出格式**
+```
+（在这个三引号块中输出对该正文片段的修改意见）
+```
\ No newline at end of file
diff --git "a/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.py" "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..00bf150b24065ecee298d6b0716fbd982d0cfbb6
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\346\255\243\346\226\207\347\232\204\346\204\217\350\247\201/prompt.py"
@@ -0,0 +1,23 @@
+import os
+from prompts.chat_utils import chat
+from prompts.prompt_utils import load_jinja2_template
+from prompts.common_parser import parse_last_code_block as parser
+
+
+# 生成重写正文的意见
+def main(model, chapter, text, selected_text):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(chapter=chapter, 
+                             text=text,
+                             selected_text=selected_text)
+    
+    for response_msgs in chat([], prompt, model, parse_chat=True):
+        suggestion = parser(response_msgs)
+        ret = {'suggestion': suggestion, 'response_msgs':response_msgs}
+        yield ret
+    
+    return ret
+
+
+
diff --git "a/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/flow.dag.yaml" "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/flow.dag.yaml"
new file mode 100644
index 0000000000000000000000000000000000000000..1ed826d153fe04889350bbebfb4fdbbfb6821f8d
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/flow.dag.yaml"
@@ -0,0 +1,53 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+environment:
+  python_requirements_txt: requirements.txt
+inputs:
+  chat_messages:
+    type: list
+    default: []
+  model:
+    type: string
+    default: ERNIE-Bot-4
+  config:
+    type: object
+    default:
+      auto_compress_context: true
+  text:
+    type: string
+  instruction:
+    type: string
+  context:
+    type: string
+outputs:
+  suggestion:
+    type: string
+    reference: ${parser.output}
+nodes:
+- name: prompt
+  type: prompt
+  source:
+    type: code
+    path: prompt.jinja2
+  inputs:
+    text: ${inputs.text}
+    context: ${inputs.context}
+    instruction: ${inputs.instruction}
+- name: chat
+  type: python
+  source:
+    type: code
+    path: ../tool_chat.py
+  inputs:
+    messages: ${inputs.chat_messages}
+    prompt: ${prompt.output}
+    model: ${inputs.model}
+    response_json: false
+    parse_chat: true
+  aggregation: false
+- name: parser
+  type: python
+  source:
+    type: code
+    path: parser.py
+  inputs:
+    response_msgs: ${chat.output}
diff --git "a/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/parser.py" "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/parser.py"
new file mode 100644
index 0000000000000000000000000000000000000000..9c168d0541203beb3d6f399eb95233135f14f3de
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/parser.py"
@@ -0,0 +1,13 @@
+from promptflow.core import tool
+
+
+@tool
+def parse_response(response_msgs):
+    from prompts.prompt_utils import parse_chunks_by_separators
+    content = response_msgs[-1]['content']
+
+    chunks = parse_chunks_by_separators(content, [r'\S*', ])
+    if "改进意见" in chunks:
+        return chunks["改进意见"]
+    else:
+        raise Exception(f"无法解析回复，找不到改进意见！")
diff --git "a/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.jinja2" "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.jinja2"
new file mode 100644
index 0000000000000000000000000000000000000000..beb64a421aed38012ba6608efdf317aa94f06dbe
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.jinja2"
@@ -0,0 +1,50 @@
+system:
+**任务**
+你是一个网文编辑，正在审阅写手发来的剧情，并针对写手提出的要求进行分析，给出具体的改进意见。
+
+**输入**
+要求
+原剧情
+上下文
+
+**输出格式**
+### 对原剧情哪些描述进行改进
+（这里结合改进建议，进行详细的分点的有条理的思考）
+
+### 改进意见
+（这里分点给出具体的有针对性的改进意见）
+
+**改进准则**
+1. 不能增加/引入/加入原剧情中没有的事件/概念
+2. 不能在原剧情结尾进行引申或总结
+3. 必须在原剧情中已有的描述上进行改进
+
+user:
+**要求**
+{{instruction}}
+
+assistant:
+明白你的要求，还要给出原剧情和上下文。
+
+user:
+**要求**
+{{instruction}}
+
+**原剧情**
+{{text}}
+
+**上下文**
+{{context}}
+
+**改进准则**
+1. 不能增加/引入/加入原剧情中没有的事件/概念
+2. 不能在原剧情结尾进行引申或总结
+3. 必须在原剧情中已有的描述上进行改进
+{#情节设计时不要苦大仇深，也不要奋发激昂，尽量幽默风趣点。#}
+
+**输出格式**
+### 对原剧情哪些已有描述进行改进
+（这里结合要求，进行详细的分点的有条理的思考，每个点都要符合改进准则）
+
+### 改进意见
+（这里分点给出具体的有针对性的改进意见）
\ No newline at end of file
diff --git "a/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.py" "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.py"
new file mode 100644
index 0000000000000000000000000000000000000000..683dd5ca02221e6362490d5007cb8b2c1ad601ee
--- /dev/null
+++ "b/prompts/\347\224\237\346\210\220\351\207\215\345\206\231\347\253\240\350\212\202\347\232\204\346\204\217\350\247\201/prompt.py"
@@ -0,0 +1,25 @@
+import os
+from prompts.chat_utils import chat
+from prompts.prompt_utils import load_jinja2_template
+from prompts.common_parser import parse_named_chunk
+
+
+def parser(response_msgs):
+    return parse_named_chunk(response_msgs, '意见')
+
+
+def main(model, instruction, text, context):
+    template = load_jinja2_template(os.path.join(os.path.dirname(os.path.join(__file__)), "prompt.jinja2"))
+
+    prompt = template.render(instruction=instruction, 
+                             text=text,
+                             context=context)
+    
+    response_msgs = yield from chat([], prompt, model, parse_chat=True)
+
+    suggestion = parser(response_msgs)
+
+    return {'suggestion': suggestion, 'response_msgs':response_msgs}
+    
+
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4178466bfc15c6ffce1d7aa6e16b570890780f1d
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,13 @@
+flask
+flask-cors
+gunicorn
+openai
+qianfan
+pymongo
+rich
+spark_ai_python
+zhipuai
+flask
+flask-cors
+numpy
+chardet
\ No newline at end of file
diff --git a/setting.py b/setting.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3a064b7a4396744b4b1c3d764ae18a47658180f
--- /dev/null
+++ b/setting.py
@@ -0,0 +1,46 @@
+from flask import Blueprint, jsonify, request
+
+setting_bp = Blueprint('setting', __name__)
+
+@setting_bp.route('/setting', methods=['GET'])
+def get_settings():
+    """Get current settings and models"""
+    from config import API_SETTINGS, DEFAULT_MAIN_MODEL, DEFAULT_SUB_MODEL, MAX_THREAD_NUM, MAX_NOVEL_SUMMARY_LENGTH
+    
+    # Get models grouped by provider
+    models = {provider: config['available_models'] for provider, config in API_SETTINGS.items() if 'available_models' in config}
+    
+    # Combine all settings
+    settings = {
+        'models': models,
+        'MAIN_MODEL': DEFAULT_MAIN_MODEL,
+        'SUB_MODEL': DEFAULT_SUB_MODEL,
+        'MAX_THREAD_NUM': MAX_THREAD_NUM,
+        'MAX_NOVEL_SUMMARY_LENGTH': MAX_NOVEL_SUMMARY_LENGTH,
+    }
+    return jsonify(settings)
+
+@setting_bp.route('/test_model', methods=['POST'])
+def test_model():
+    """Test if a model configuration works"""
+    try:
+        data = request.get_json()
+        provider_model = data.get('provider_model')
+        
+        from backend_utils import get_model_config_from_provider_model
+        model_config = get_model_config_from_provider_model(provider_model)
+        
+        from llm_api import test_stream_chat
+        response = None
+        for msg in test_stream_chat(model_config):
+            response = msg
+            
+        return jsonify({
+            'success': True,
+            'response': response
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
diff --git a/summary.py b/summary.py
new file mode 100644
index 0000000000000000000000000000000000000000..80122b09b72ba0a3d9df091549a1542ae237cc1f
--- /dev/null
+++ b/summary.py
@@ -0,0 +1,118 @@
+import time
+from core.parser_utils import parse_chapters
+from core.summary_novel import summary_draft, summary_plot, summary_chapters
+from config import MAX_NOVEL_SUMMARY_LENGTH, MAX_THREAD_NUM, ENABLE_ONLINE_DEMO
+
+def batch_yield(generators, max_co_num=5, ret=[]):
+    results = [None] * len(generators)
+    yields = [None] * len(generators)
+    finished = [False] * len(generators)
+
+    while True:
+        co_num = 0
+        for i, gen in enumerate(generators):
+            if finished[i]:
+                continue
+
+            try:
+                co_num += 1
+                yield_value = next(gen)
+                yields[i] = yield_value
+            except StopIteration as e:
+                results[i] = e.value
+                finished[i] = True
+            
+            if co_num >= max_co_num:
+                    break
+        
+        if all(finished):
+            break
+
+        yield yields
+
+    ret.clear()
+    ret.extend(results)
+    return ret
+
+def process_novel(content, novel_name, model, sub_model, max_novel_summary_length, max_thread_num):
+    if ENABLE_ONLINE_DEMO:
+        if max_novel_summary_length > MAX_NOVEL_SUMMARY_LENGTH:
+            raise Exception("在线Demo模型下，最大小说长度不能超过" + str(MAX_NOVEL_SUMMARY_LENGTH) + "个字符！")
+        if max_thread_num > MAX_THREAD_NUM:
+            raise Exception("在线Demo模型下，最大线程数不能超过" + str(MAX_THREAD_NUM) + "！")
+
+    if len(content) > max_novel_summary_length:
+        content = content[:max_novel_summary_length]
+        yield {"progress_msg": f"小说长度超出最大处理长度，已截断，只处理前{max_novel_summary_length}个字符。"}
+        time.sleep(1)
+
+    # Parse chapters
+    yield {"progress_msg": "正在解析章节..."}
+
+    chapter_titles, chapter_contents = parse_chapters(content)
+
+    yield {"progress_msg": "解析出章节数：" + str(len(chapter_titles))}
+
+    if len(chapter_titles) == 0:
+        raise Exception("解析出章节数为0！！！")
+
+    # Process draft summaries
+    yield {"progress_msg": "正在生成剧情摘要..."}
+    dw_list = []
+    gens = [summary_draft(model, sub_model, ' '.join(title), content) for title, content in zip(chapter_titles, chapter_contents)]
+    for yields in batch_yield(gens, ret=dw_list, max_co_num=max_thread_num):
+        chars_num = sum([e['chars_num'] for e in yields if e is not None])
+        current_cost = sum([e['current_cost'] for e in yields if e is not None])
+        currency_symbol = next(e['currency_symbol'] for e in yields if e is not None)
+        model_text = next(e['model'] for e in yields if e is not None)
+        yield {"progress_msg": f"正在生成剧情摘要 进度：{sum([1 for e in yields if e is not None])} / {len(yields)} 模型：{model_text} 已生成字符：{chars_num} 已花费：{current_cost:.4f}{currency_symbol}"}
+
+    # Process plot summaries
+    yield {"progress_msg": "正在生成章节大纲..."}
+    cw_list = []
+    gens = [summary_plot(model, sub_model, ' '.join(title), dw.x) for title, dw in zip(chapter_titles, dw_list)]
+    for yields in batch_yield(gens, ret=cw_list, max_co_num=max_thread_num):
+        chars_num = sum([e['chars_num'] for e in yields if e is not None])
+        current_cost = sum([e['current_cost'] for e in yields if e is not None])
+        currency_symbol = next(e['currency_symbol'] for e in yields if e is not None)
+        model_text = next(e['model'] for e in yields if e is not None)
+        yield {"progress_msg": f"正在生成章节大纲 进度：{sum([1 for e in yields if e is not None])} / {len(yields)} 模型：{model_text} 已生成字符：{chars_num} 已花费：{current_cost:.4f}{currency_symbol}"}
+
+    # Process chapter summaries
+    yield {"progress_msg": "正在生成全书大纲..."}
+    ow_list = []
+    gens = [summary_chapters(model, sub_model, novel_name, chapter_titles, [cw.global_context['chapter'] for cw in cw_list])]
+    for yields in batch_yield(gens, ret=ow_list, max_co_num=max_thread_num):
+        chars_num = sum([e['chars_num'] for e in yields if e is not None])
+        current_cost = sum([e['current_cost'] for e in yields if e is not None])
+        currency_symbol = next(e['currency_symbol'] for e in yields if e is not None)
+        model_text = next(e['model'] for e in yields if e is not None)
+        yield {"progress_msg": f"正在生成全书大纲 模型：{model_text} 已生成字符：{chars_num} 已花费：{current_cost:.4f}{currency_symbol}"}
+
+    # Prepare final response
+    outline = ow_list[0]
+    plot_data = {}
+    draft_data = {}
+
+    for title, chapter_outline, cw, dw in zip(chapter_titles, [e[1] for e in outline.xy_pairs], cw_list, dw_list):
+        chapter_name = ' '.join(title)
+        plot_data[chapter_name] = {
+            'chunks': [('', e) for e, _ in dw.xy_pairs],
+            'context': chapter_outline # 不采用cw.global_context['chapter']，因为不含章节名
+        }
+        draft_data[chapter_name] = {
+            'chunks': dw.xy_pairs,
+            'context': ''  # Draft doesn't have global context
+        }
+
+    final_response = {
+        "progress_msg": "处理完成！",
+        "outline": {
+            "chunks": outline.xy_pairs,
+            "context": outline.global_context['outline']
+        },
+        "plot": plot_data,
+        "draft": draft_data
+    }
+
+    yield final_response