Spaces:

longquan
/

cobol_analysis

Paused

App Files Files Community

qiulongquan commited on Jan 28, 2024

Commit

1e88633

1 Parent(s): 2a1c702

Add application file

Browse files

Files changed (3) hide show

1.png +0 -0
cobol_analysis_with_azure.py +221 -0
config.json +7 -0

1.png ADDED Viewed

cobol_analysis_with_azure.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import os
+import openai
+import json
+import tiktoken
+import gradio as gr
+import time
+"""
+使用azure openai作为GPT模型
+进行cobol代码分析
+UI采用gradio框架
+UI使用chatbot进行交互
+已经实现chatbot的交互问答以及历史记录显示和历史内容保存
+chatbot上面不显示prompt内容
+实现稳定输出和创造性输出的切换
+TODO:
+1.还需要一个 stop 生成
+2.流式stream输出
+3.few-shot learning sample
+"""
+# 通过max_response_tokens控制回复的长度
+max_response_tokens = 8000
+history_show = []
+temperature=0.5
+top_p=0.95
+# Load config values
+with open('config.json') as config_file:
+    config_details = json.load(config_file)
+# Setting up the deployment name  这个地方不是模型名字，是Azure OpenAI的部署名字
+chatgpt_model_name = config_details['CHATGPT_MODEL']
+openai.api_type = "azure"
+# The API key for your Azure OpenAI resource.
+openai.api_key = config_details['OPENAI_API_KEY']
+# The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
+openai.api_base = config_details['OPENAI_API_BASE']
+# Currently Chat Completions API have the following versions available: 2023-03-15-preview
+openai.api_version = config_details['OPENAI_API_VERSION']
+def radio_change(choice):
+    global temperature,top_p
+    if choice=="安定出力":
+        temperature=0.5
+        top_p=0.95
+    elif choice=="積極出力":
+        temperature=0.7
+        top_p=0.95
+# Defining a function to send the prompt to the ChatGPT model
+# More info : https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/chatgpt?pivots=programming-language-chat-completions
+def cobol_analysis_process(history, messages, model_name, max_response_tokens=500):
+    print("temperature=",temperature,"top_p=",top_p)
+    response = openai.ChatCompletion.create(
+        engine=model_name,
+        messages=messages,
+        temperature=temperature,
+        top_p=top_p,
+        # temperature=0.7,
+        # top_p=0.95,
+        max_tokens=max_response_tokens,
+        frequency_penalty=0,
+        presence_penalty=0,
+        # stop="非非",
+        stream=True,
+    )
+    # print("response",response)
+    print("===========history",history)
+    history[-1][1] = ""
+    history_show[-1][1] = ""
+    for response_ in response:
+        for choice in response_.choices:
+            history[-1][1] += choice.delta.content if "content" in choice.delta else ""
+            history_show[-1][1] += choice.delta.content if "content" in choice.delta else ""
+# Defining a function to print out the conversation in a readable format
+# def print_conversation(messages):
+#     for message in messages:
+#         print(f"[{message['role'].upper()}]")
+#         print(message['content'])
+#         print()
+def preprocess(history):
+    # print("history",history)
+    base_system_message = "あなたは優秀なCOBOLコード分析者です。あなたの仕事は要件に基づいてCOBOLコードを分析し、結果を出力することです。結果は日本語で出力する必要があります。"
+    messages=[{"role": "system", "content": base_system_message}]
+    for content in history:
+        messages.append({"role": "user", "content": content[0]})
+        if content[1] is not None:
+            messages.append({"role": "assistant", "content": content[1]})
+    print("messages",messages)
+    # response = cobol_analysis_process(messages, chatgpt_model_name, max_response_tokens)
+    # history[-1] = (history[-1][0], response)
+    cobol_analysis_process(history,messages, chatgpt_model_name, max_response_tokens)
+    # print_conversation(messages)
+# 点击【提出】按钮后调用greet函数进行处理
+def greet(history,user_input,analysis_options):
+    # print("==========analysis_options=============",analysis_options)
+    analysis_content=""
+    if analysis_options=="全体概要 入出力 COPY句 サブルーチン解析":
+        analysis_content="""
+        1.概述一下这个程序主要做了什么,全体程序的数据流程以及每个模块的主要内容。全体概要进行说明并使用table表格输出内容。\n
+        2.程序中所有的入力参数和出力参数,要求使用table表格分别表示,要求每一个对象要有简要的介绍。要再次确认不能有遗漏项目。\n
+        3.程序中所有的COPY句(COPY文),总结成list表格显示。要求每一个对象要有简要的介绍。要再次确认不能有遗漏项目,所有的COPY句都要总结并在list中输出。\n
+        4.全体程序中使用的子程序,包括CALL呼叫的子程序,调用外部文件的子程序。这些子程序总结成list表格显示。要求每一个对象要有简要的介绍。要再次确认不能有遗漏项目。
+        """
+    elif analysis_options=="データ定義分析":
+        analysis_content="""
+        1.要求分析每一行COBOL代码,不能遗漏任何数据定义行,分析内容使用table表格输出
+        2.数据定义内容输出格式[等级][项目名][数据类型][长度][初期値]
+        3.PIC Xデータ型は文字型，PIC 9データ型は数値型
+        """
+    elif analysis_options=="IF ELSE END解析":
+        analysis_content="""
+        要求：根据下面的要求以及分析例子分析上面的COBOL代码并使用table表格输出结果
+        1. 分析每一行cobol代码
+        2. 分析WHILE语句中条件内容
+        3. 全部IF ... OR ... ELSE ... END条件语句中条件，变量名，变量数值或者字段内容变化，MOVEコマンド内容，DISPLAY显示的内容，VCALL调用的子程序内容，PERFORM调用内容，RETURN返回内容。这些内容要使用table表格简要表示(tabel列内容包括 [行番号],[コマンド/条件],[層級],[変数名],[変数の変化],[MOVEコマンド内容],[DISPLAY内容],[CALL内容],[PERFORM内容],[RETURN内容])
+        4. [コマンド/条件]列需要把条件语句的全部内容都写入，条件语句结束标志END和ELSE需要单独一行加入[コマンド/条件]列，嵌套多层IF条件语句中的每一个ELSE，END都不能省略。
+        5. 程序中注释的语句不需要分析，不需要输出结果
+        6. 如果有嵌套IF ... ELSE ... IF ... ELSE ... END ... END 需要table中明确表示层级关系
+        7. 如果是同级别IF ... ELSE ... END table中层级关系数字相同
+        8. 如果有嵌套 WHILE 需要table中明确表示层级关系
+        9. CASE OF END语句不要表示[層級]数值
+        10. 如果是同级别WHILE, table中层级关系数字相同
+        11. RETURN: S 表示程序终了，在[RETURN内容]列输出[プログラム終了]
+        12. DISPLAY语句需要把全部内容显示在[DISPLAY内容]列，不能遗漏内容
+            例：DISPLAY "FMクブン エラー4 HINCODE = " L-HINCODE
+            输出 '"FMクブン エラー4 HINCODE = " L-HINCODE'
+        13. [変数の変化]列需要明确表示变数的变化状况。
+            例：IF: NB-CNT > 0
+            输出 NB-CNTが0より大きい場合
+            例：IF: L-FM = "1"
+            输出 L-FMが1となる場合
+        """
+    elif analysis_options=="TABLE COND ACT END解析":
+        analysis_content="""
+        要求分析每一行cobol代码,结果使用table表格显示
+        如果有嵌套TABLE COND ACT END需要table中明确表示层级关系
+        同一个TABLE COND ACT END中所有的层级都相同
+        全部TABLE COND ACT END语句中条件，变量名，判断条件，判断结果。这些内容要使用table表格简要表示(tabel列内容包括 [行番号],[条件],[層級],[変数名],[判断条件],[判断结果])
+        例：
+        005070     TABLE:                                                       MSKSJ010
+        005080      COND:                                                       MSKSJ010
+        005090      NHINW-KBN2 (9) = "1"             :Y,Y,N,N,N:                MSKSJ010
+        005130      ACT:                                                        MSKSJ010
+        005230      NSKD1-KBN12 := "3"               :-,-,-,-,X:                MSKSJ010
+        005240     END:                                                         MSKSJ010
+        [行番号] 005090
+        [条件] NHINW-KBN2 (9) = "1"
+        [層級] 1
+        [変数名] NHINW-KBN2 (9)
+        [判断条件/変数値変化] "1" かどうかのチェック
+        [判断結果] :Y,Y,N,N,N:
+        """
+    elif analysis_options=="コード解析":
+        analysis_content="""分析上面每一行cobol代码，不能有遗漏的代码行，使用table输出结果。table表格的列名[行番号  COBOLコード   コード解析結果]
+        sample 1:
+        clang0 DS_START_PROC SECTION.
+        行番号：clang0
+        COBOLコード：DS_START_PROC SECTION
+        コード解析結果：DS_START_PROCというセクションの開始を宣言しています。
+        sample 2:
+        001120                UNTIL: X = MTOSM2W-KOSU
+        行番号：001120
+        COBOLコード：UNTIL: X = MTOSM2W-KOSU
+        コード解析結果：この行は、XがMTOSM2W-KOSUと等しいまでのループを示しています。
+        """
+    elif analysis_options=="カスタマイズprompt":
+        analysis_content=""
+    history_show.append([analysis_options+"\n\n"+user_input, None])
+    if user_input != "":
+        user_input = user_input+"\n\n"+analysis_content
+    else:
+        user_input = ""
+    print("user_input==========",user_input)
+    history.append([user_input, None])
+    # print("history", history)
+    preprocess(history)
+    return history_show, gr.Textbox(value="", interactive=False)
+def bot(history_show):
+    yield history_show
+def print_like_dislike(x: gr.LikeData):
+    print(x.index, x.value, x.liked)
+# 页面内容输出控制
+with gr.Blocks() as demo:
+    gr.Markdown("""
+                <h1 style="text-align: center;">COBOL解析</h1>
+                """)  # 设置标题 可以使用markdown语法
+    chatbot = gr.Chatbot(
+        [],
+        elem_id="chatbot",
+        bubble_full_width=False,
+        show_copy_button=True,
+        avatar_images=(None, (os.path.join(os.path.dirname(__file__), "1.png"))),
+    )
+    analysis_options = gr.Dropdown(['全体概要 入出力 COPY句 サブルーチン解析', 'データ定義分析', 'IF ELSE END解析', 'TABLE COND ACT END解析', 'コード解析', 'カスタマイズprompt'], label="解析タイプ選択")
+    radio=gr.Radio(["安定出力", "積極出力"], label="ランダム性制御", info="「安定出力」を採用するとモデルはより多くの決定論的な応答を生成します。「積極出力」を採用するとより多くの創造的な応答が生じます。")
+    user_input = gr.Textbox(scale=4,show_label=False,placeholder="user input", container=False,lines=1)  # 设置输入框
+    # 使用gr.ClearButton来清空chatbot记录的内容
+    clear1 = gr.ClearButton([user_input],value="入力コンテンツクリア")
+    clear2 = gr.ClearButton([user_input, chatbot],value="Chatコンテンツクリア")
+    radio.change(fn=radio_change, inputs=radio)
+    txt_msg = user_input.submit(greet, [chatbot,user_input,analysis_options],[chatbot,user_input], queue=False).then(
+        bot, chatbot, chatbot, api_name="bot_response"
+    )
+    txt_msg.then(lambda: gr.Textbox(interactive=True), None, [user_input], queue=False)
+    chatbot.like(print_like_dislike, None, None)
+demo.queue()
+if __name__ == "__main__":
+    demo.launch(share=True)

config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "CHATGPT_MODEL":"azure-canada-qiu-20240119",
+    "OPENAI_API_BASE":"https://azure-qiu-canada-east-20240119.openai.azure.com",
+    "OPENAI_API_VERSION":"2023-07-01-preview",
+    "OPENAI_API_KEY":"a00e229fc3414ccc8df341baccdbf1ab",
+    "OPENAI_EMBEDDINGS_MODEL":"azure-embedding-20240124"
+}