Spaces:

CognizantAI
/

IntelAnalyser

Sleeping

App Files Files Community

ashischakraborty commited on Feb 10, 2025

Commit

657d3ba

verified ·

1 Parent(s): 722578b

Update azure_openai.py

Browse files

Files changed (1) hide show

azure_openai.py +348 -348

azure_openai.py CHANGED Viewed

@@ -1,349 +1,349 @@
-import streamlit as st
-import os
-import pandas as pd
-# from langchain.chat_models import AzureChatOpenAI
-from langchain_openai import AzureChatOpenAI
-from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
-from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
-from pydantic import BaseModel, Field, validator
-from langchain.output_parsers.enum import EnumOutputParser
-from langchain_core.prompts import PromptTemplate
-from enum import Enum
-os.environ["LANGCHAIN_TRACING_V2"]="true"
-os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
-LANGCHAIN_API_KEY = st.secrets['LANGCHAIN_API_KEY']
-os.environ["LANGCHAIN_PROJECT"]="UC2e2e"
-# LLM Langchain Definition
-OPENAI_API_KEY = st.secrets['OPENAI_API_KEY']
-OPENAI_API_TYPE = "azure"
-OPENAI_API_BASE = "https://davidfearn-gpt4.openai.azure.com"
-OPENAI_API_VERSION = "2024-08-01-preview"
-OPENAI_MODEL = "gpt-4o-mini"
-# Function to read file contents
-def read_file(file):
-    """
-    Reads the content of a text file and returns it as a string.
-    :param file: The file name to read from the 'assets' directory.
-    :return: The content of the file as a string or None if an error occurs.
-    """
-    fp = f"assets/{file}.md"
-    try:
-        with open(fp, 'r', encoding='utf-8') as file:
-            content = file.read()
-        return content
-    except FileNotFoundError:
-        print(f"The file at {fp} was not found.")
-    except IOError:
-        print(f"An error occurred while reading the file at {fp}.")
-    return None
-# Function to generate structured insights
-def process_insight(chunk, topic,source):
-    GSKGlossary = read_file("GSKGlossary")
-    if source== "intl":
-        SystemMessage = read_file("intl_insight_system_message")
-        UserMessage = read_file("intl_insight_user_message")
-    else:
-        SystemMessage = read_file("ext_insight_system_message")
-        UserMessage = read_file("ext_insight_user_message")
-    class Insights(BaseModel):
-        completed: bool = Field(description="This field is used to indicate that you think the number of insights has been completed")
-        insight: str = Field(description="This field is used to return the MECE insight in string format")
-    llm = AzureChatOpenAI(
-        openai_api_version=OPENAI_API_VERSION,
-        openai_api_key=OPENAI_API_KEY,
-        azure_endpoint=OPENAI_API_BASE,
-        openai_api_type=OPENAI_API_TYPE,
-        deployment_name=OPENAI_MODEL,
-        temperature=0,
-    )
-    system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
-    structured_llm = llm.with_structured_output(Insights)
-    prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
-    chain = prompt | structured_llm
-    new_insights = []
-    insights_data = []
-    while True:
-        # Invoke the LLM with the current chunk and existing insights
-        counter = 5 - len(new_insights)
-        new_insight_response = chain.invoke({"chunk": chunk, "existing_insights": new_insights, "counter": counter, "GSKGlossary": GSKGlossary, "topic":topic})
-        classification = selectClass(new_insight_response.insight)
-        # Append the new insight to the list
-        new_insights.append(new_insight_response.insight)
-        insights_data.append({
-            # "completed": new_insight_response.completed,
-            "classification": classification,
-            "insight": new_insight_response.insight,
-            "chunk": chunk
-        })
-        # Check if "completed" is True or the list of "new_insights" is >= 3
-        if new_insight_response.completed and len(new_insights) >= 3:
-            return pd.DataFrame(insights_data)
-        # If the list of "new_insights" reaches 5, return the list
-        if len(new_insights) == 5:
-            return pd.DataFrame(insights_data)
-def selectClass(insight):
-    classification_system_message = read_file("classification_system_message")
-    classification_user_message = read_file("classification_user_message")
-    class InsightClassification(Enum):
-        IMPACT = "impact"
-        CONSULTATION = "consultation"
-        AWARENESS = "awareness"
-    llm = AzureChatOpenAI(
-        openai_api_version=OPENAI_API_VERSION,
-        openai_api_key=OPENAI_API_KEY,
-        azure_endpoint=OPENAI_API_BASE,
-        openai_api_type=OPENAI_API_TYPE,
-        deployment_name=OPENAI_MODEL,
-        temperature=0,
-    )
-    parser = EnumOutputParser(enum=InsightClassification)
-    system_message_template = SystemMessagePromptTemplate.from_template(classification_system_message)
-# structured_llm = llm.with_structured_output(Insights)
-    prompt = ChatPromptTemplate.from_messages([system_message_template, classification_user_message]).partial(options=parser.get_format_instructions())
-    chain = prompt | llm | parser
-    result = chain.invoke({"insight": insight})
-    return result.value
-def process_chunks(chunk, topic,source):
-    """
-    Processes chunks from a specific dataframe column, invokes the get_structured function for each chunk,
-    and combines the resulting dataframes into one dataframe.
-    :param df: The dataframe containing chunks.
-    :param temp: Temperature parameter for the LLM.
-    :param SystemMessage: System message template.
-    :param UserMessage: User message template.
-    :param completedMessage: Completion message description.
-    :param insightMessage: Insight message description.
-    :param chunk_column: The name of the column containing text chunks to process.
-    :return: A combined dataframe of insights from all chunks.
-    """
-    all_insights = []
-    for chunk in chunk["ChunkText"]:
-        insights_df = process_insight(chunk, topic,source)
-        all_insights.append(insights_df)
-    return pd.concat(all_insights, ignore_index=True)
-def evaluation_llm(chunk, topic , source):
-    GSKGlossary = read_file("GSKGlossary")
-    if source == "intl":
-        SystemMessage = read_file("intl_eval_system_message")
-        UserMessage = read_file("intl_eval_user_message")
-    else:
-        SystemMessage = read_file("ext_eval_system_message")
-        UserMessage = read_file("ext_eval_user_message")
-    class Evaluate(BaseModel):
-        decision: bool = Field(description="True: The content of the document relates to the topic.False: The content of the document does not relate to the topic.")
-        justification: str = Field(description="Please justify your decision in a logical and structured way.")
-    llm = AzureChatOpenAI(
-        openai_api_version=OPENAI_API_VERSION,
-        openai_api_key=OPENAI_API_KEY,
-        azure_endpoint=OPENAI_API_BASE,
-        openai_api_type=OPENAI_API_TYPE,
-        deployment_name=OPENAI_MODEL,
-        temperature=0,
-    )
-    system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
-    structured_llm = llm.with_structured_output(Evaluate)
-    # Create a chat prompt template combining system and human messages
-    prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
-    chain = prompt | structured_llm
-    return chain.invoke({
-        "chunk": chunk,
-        "topic": topic,
-        "GSKGlossary": GSKGlossary
-    })
-def evaluation_process(df_chunks, topic,source):
-    """
-    Iterates over chunks in the DataFrame and processes them using `get_structured`.
-    :param df_chunks: DataFrame containing chunks.
-    :param systemMessage: System message for evaluation.
-    :param userMessage: User message template for evaluation.
-    :param temp: Temperature setting for the model.
-    :param decisionMessage: Description for decision field.
-    :param justificationMessage: Description for justification field.
-    :return: Updated DataFrame with decision and justification columns and consensus value.
-    """
-    decisions = []
-    justifications = []
-    # Avoid re-inserting columns if they already exist
-    if "Decision" in df_chunks.columns:
-        df_chunks = df_chunks.drop(columns=["Decision", "Justification"])
-    for _, chunk in df_chunks.iterrows():
-        result = evaluation_llm(chunk['ChunkText'], topic,source)
-        decisions.append("True" if result.decision else "False")  # Convert bool to string
-        justifications.append(result.justification)
-    # Add new columns to the DataFrame
-    df_chunks.insert(0, "Decision", decisions)
-    df_chunks.insert(1, "Justification", justifications)
-    # Count all True/False values for consensus and get most frequent value
-    consensus_count = df_chunks["Decision"].value_counts()
-    consensus_value = consensus_count.idxmax()  # Most frequently occurring value
-    return df_chunks, consensus_value, consensus_count
-def process_compare(insight_df, sopChunk_df, topic):
-    GSKGlossary = read_file("GSKGlossary")
-    SystemMessage = read_file("compare_system_message")
-    UserMessage = read_file("compare_user_message")
-    # Define the structured output model
-    class Compare(BaseModel):
-        review: bool = Field(description="This field is used to indicate whether a review is needed")
-        justification: str = Field(description="This field is used to justify why a review is needed")
-    # Initialize the LLM
-    llm = AzureChatOpenAI(
-        openai_api_version=OPENAI_API_VERSION,
-        openai_api_key=OPENAI_API_KEY,
-        azure_endpoint=OPENAI_API_BASE,
-        openai_api_type=OPENAI_API_TYPE,
-        deployment_name=OPENAI_MODEL,
-        temperature=0,
-    )
-    # Create the structured output and prompt chain
-    system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
-    structured_llm = llm.with_structured_output(Compare)
-    prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
-    chain = prompt | structured_llm
-    compare_data = []
-    # Iterate over sopChunk_df and insight_df to process "ChunkText" and "insight"
-    for sopChunk_index, sopChunk_row in sopChunk_df.iterrows():
-        sop_chunk_text = sopChunk_row["ChunkText"]  # Extract the ChunkText column
-        for insight_index, insight_row in insight_df.iterrows():
-            insight_text = insight_row["insight"]  # Extract the insight column
-            # Invoke the LLM with the extracted data
-            compare_response = chain.invoke({
-                "sopChunk": sop_chunk_text,
-                "insight": insight_text,
-                "topic": topic,
-                "GSKGlossary": GSKGlossary
-            })
-            # Append the response to insights_data
-            compare_data.append({
-                "ReviewNeeded": compare_response.review,
-                "Justification": compare_response.justification,
-                "SOP": sop_chunk_text,
-                "Insight": insight_text
-            })
-    # Return the insights as a single DataFrame
-    print(compare_data)
-    return pd.DataFrame(compare_data)
-def risk_score_process(compare_df, topic):
-    GSKGlossary = read_file("GSKGlossary")
-    SystemMessage = read_file("risk_scoring_system_message")
-    UserMessage = read_file("risk_scoring_user_message")
-# Define the Enum for predefined options
-    class RiskClassification(str, Enum):
-        HIGH = "high"
-        MEDIUM = "medium"
-        LOW = "low"
-    # Define the Pydantic model for the structured output
-    class Risk(BaseModel):
-        risk_level: RiskClassification = Field(
-            description="The selected classification option."
-        )
-        justification: str = Field(
-            description="Justify the reason for choosing this risk classification."
-        )
-        advice: str = Field(
-            description="Suggestions for changes that could be made to the standard operating procedure to mitigat the risk."
-        )
-    llm = AzureChatOpenAI(
-        openai_api_version=OPENAI_API_VERSION,
-        openai_api_key=OPENAI_API_KEY,
-        azure_endpoint=OPENAI_API_BASE,
-        openai_api_type=OPENAI_API_TYPE,
-        deployment_name=OPENAI_MODEL,
-        temperature=0,
-    )
-    system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
-    structured_llm = llm.with_structured_output(Risk)
-    prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
-    chain = prompt | structured_llm
-    risk_data = []
-    # Iterate over sopChunk_df and insight_df to process "ChunkText" and "insight"
-    for index, row in compare_df.iterrows():
-            # Invoke the LLM with the extracted data
-            risk_response = chain.invoke({
-                "comparison": row['Justification'],
-                "insight": row['Insight'],
-                "SOPchunk":row['SOP'],
-                "topic": topic
-            })
-            # Append the response to insights_data
-            risk_data.append({
-                "RiskLevel": risk_response.risk_level,
-                "Justification": risk_response.justification,
-                "advice": risk_response.advice,
-                "comparison": row['Justification'],
-                "insight": row['Insight'],
-                "SOPchunk":row['SOP']
-            })
-    # Return the insights as a single DataFrame
     return pd.DataFrame(risk_data)

+import streamlit as st
+import os
+import pandas as pd
+# from langchain.chat_models import AzureChatOpenAI
+from langchain_openai import AzureChatOpenAI
+from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
+from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
+from pydantic import BaseModel, Field, validator
+from langchain.output_parsers.enum import EnumOutputParser
+from langchain_core.prompts import PromptTemplate
+from enum import Enum
+#os.environ["LANGCHAIN_TRACING_V2"]="true"
+#os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
+#LANGCHAIN_API_KEY = st.secrets['LANGCHAIN_API_KEY']
+#os.environ["LANGCHAIN_PROJECT"]="UC2e2e"
+# LLM Langchain Definition
+OPENAI_API_KEY = st.secrets['OPENAI_API_KEY']
+OPENAI_API_TYPE = "azure"
+OPENAI_API_BASE = "https://davidfearn-gpt4.openai.azure.com"
+OPENAI_API_VERSION = "2024-08-01-preview"
+OPENAI_MODEL = "gpt-4o-mini"
+# Function to read file contents
+def read_file(file):
+    """
+    Reads the content of a text file and returns it as a string.
+    :param file: The file name to read from the 'assets' directory.
+    :return: The content of the file as a string or None if an error occurs.
+    """
+    fp = f"assets/{file}.md"
+    try:
+        with open(fp, 'r', encoding='utf-8') as file:
+            content = file.read()
+        return content
+    except FileNotFoundError:
+        print(f"The file at {fp} was not found.")
+    except IOError:
+        print(f"An error occurred while reading the file at {fp}.")
+    return None
+# Function to generate structured insights
+def process_insight(chunk, topic,source):
+    GSKGlossary = read_file("GSKGlossary")
+    if source== "intl":
+        SystemMessage = read_file("intl_insight_system_message")
+        UserMessage = read_file("intl_insight_user_message")
+    else:
+        SystemMessage = read_file("ext_insight_system_message")
+        UserMessage = read_file("ext_insight_user_message")
+    class Insights(BaseModel):
+        completed: bool = Field(description="This field is used to indicate that you think the number of insights has been completed")
+        insight: str = Field(description="This field is used to return the MECE insight in string format")
+    llm = AzureChatOpenAI(
+        openai_api_version=OPENAI_API_VERSION,
+        openai_api_key=OPENAI_API_KEY,
+        azure_endpoint=OPENAI_API_BASE,
+        openai_api_type=OPENAI_API_TYPE,
+        deployment_name=OPENAI_MODEL,
+        temperature=0,
+    )
+    system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
+    structured_llm = llm.with_structured_output(Insights)
+    prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
+    chain = prompt | structured_llm
+    new_insights = []
+    insights_data = []
+    while True:
+        # Invoke the LLM with the current chunk and existing insights
+        counter = 5 - len(new_insights)
+        new_insight_response = chain.invoke({"chunk": chunk, "existing_insights": new_insights, "counter": counter, "GSKGlossary": GSKGlossary, "topic":topic})
+        classification = selectClass(new_insight_response.insight)
+        # Append the new insight to the list
+        new_insights.append(new_insight_response.insight)
+        insights_data.append({
+            # "completed": new_insight_response.completed,
+            "classification": classification,
+            "insight": new_insight_response.insight,
+            "chunk": chunk
+        })
+        # Check if "completed" is True or the list of "new_insights" is >= 3
+        if new_insight_response.completed and len(new_insights) >= 3:
+            return pd.DataFrame(insights_data)
+        # If the list of "new_insights" reaches 5, return the list
+        if len(new_insights) == 5:
+            return pd.DataFrame(insights_data)
+def selectClass(insight):
+    classification_system_message = read_file("classification_system_message")
+    classification_user_message = read_file("classification_user_message")
+    class InsightClassification(Enum):
+        IMPACT = "impact"
+        CONSULTATION = "consultation"
+        AWARENESS = "awareness"
+    llm = AzureChatOpenAI(
+        openai_api_version=OPENAI_API_VERSION,
+        openai_api_key=OPENAI_API_KEY,
+        azure_endpoint=OPENAI_API_BASE,
+        openai_api_type=OPENAI_API_TYPE,
+        deployment_name=OPENAI_MODEL,
+        temperature=0,
+    )
+    parser = EnumOutputParser(enum=InsightClassification)
+    system_message_template = SystemMessagePromptTemplate.from_template(classification_system_message)
+# structured_llm = llm.with_structured_output(Insights)
+    prompt = ChatPromptTemplate.from_messages([system_message_template, classification_user_message]).partial(options=parser.get_format_instructions())
+    chain = prompt | llm | parser
+    result = chain.invoke({"insight": insight})
+    return result.value
+def process_chunks(chunk, topic,source):
+    """
+    Processes chunks from a specific dataframe column, invokes the get_structured function for each chunk,
+    and combines the resulting dataframes into one dataframe.
+    :param df: The dataframe containing chunks.
+    :param temp: Temperature parameter for the LLM.
+    :param SystemMessage: System message template.
+    :param UserMessage: User message template.
+    :param completedMessage: Completion message description.
+    :param insightMessage: Insight message description.
+    :param chunk_column: The name of the column containing text chunks to process.
+    :return: A combined dataframe of insights from all chunks.
+    """
+    all_insights = []
+    for chunk in chunk["ChunkText"]:
+        insights_df = process_insight(chunk, topic,source)
+        all_insights.append(insights_df)
+    return pd.concat(all_insights, ignore_index=True)
+def evaluation_llm(chunk, topic , source):
+    GSKGlossary = read_file("GSKGlossary")
+    if source == "intl":
+        SystemMessage = read_file("intl_eval_system_message")
+        UserMessage = read_file("intl_eval_user_message")
+    else:
+        SystemMessage = read_file("ext_eval_system_message")
+        UserMessage = read_file("ext_eval_user_message")
+    class Evaluate(BaseModel):
+        decision: bool = Field(description="True: The content of the document relates to the topic.False: The content of the document does not relate to the topic.")
+        justification: str = Field(description="Please justify your decision in a logical and structured way.")
+    llm = AzureChatOpenAI(
+        openai_api_version=OPENAI_API_VERSION,
+        openai_api_key=OPENAI_API_KEY,
+        azure_endpoint=OPENAI_API_BASE,
+        openai_api_type=OPENAI_API_TYPE,
+        deployment_name=OPENAI_MODEL,
+        temperature=0,
+    )
+    system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
+    structured_llm = llm.with_structured_output(Evaluate)
+    # Create a chat prompt template combining system and human messages
+    prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
+    chain = prompt | structured_llm
+    return chain.invoke({
+        "chunk": chunk,
+        "topic": topic,
+        "GSKGlossary": GSKGlossary
+    })
+def evaluation_process(df_chunks, topic,source):
+    """
+    Iterates over chunks in the DataFrame and processes them using `get_structured`.
+    :param df_chunks: DataFrame containing chunks.
+    :param systemMessage: System message for evaluation.
+    :param userMessage: User message template for evaluation.
+    :param temp: Temperature setting for the model.
+    :param decisionMessage: Description for decision field.
+    :param justificationMessage: Description for justification field.
+    :return: Updated DataFrame with decision and justification columns and consensus value.
+    """
+    decisions = []
+    justifications = []
+    # Avoid re-inserting columns if they already exist
+    if "Decision" in df_chunks.columns:
+        df_chunks = df_chunks.drop(columns=["Decision", "Justification"])
+    for _, chunk in df_chunks.iterrows():
+        result = evaluation_llm(chunk['ChunkText'], topic,source)
+        decisions.append("True" if result.decision else "False")  # Convert bool to string
+        justifications.append(result.justification)
+    # Add new columns to the DataFrame
+    df_chunks.insert(0, "Decision", decisions)
+    df_chunks.insert(1, "Justification", justifications)
+    # Count all True/False values for consensus and get most frequent value
+    consensus_count = df_chunks["Decision"].value_counts()
+    consensus_value = consensus_count.idxmax()  # Most frequently occurring value
+    return df_chunks, consensus_value, consensus_count
+def process_compare(insight_df, sopChunk_df, topic):
+    GSKGlossary = read_file("GSKGlossary")
+    SystemMessage = read_file("compare_system_message")
+    UserMessage = read_file("compare_user_message")
+    # Define the structured output model
+    class Compare(BaseModel):
+        review: bool = Field(description="This field is used to indicate whether a review is needed")
+        justification: str = Field(description="This field is used to justify why a review is needed")
+    # Initialize the LLM
+    llm = AzureChatOpenAI(
+        openai_api_version=OPENAI_API_VERSION,
+        openai_api_key=OPENAI_API_KEY,
+        azure_endpoint=OPENAI_API_BASE,
+        openai_api_type=OPENAI_API_TYPE,
+        deployment_name=OPENAI_MODEL,
+        temperature=0,
+    )
+    # Create the structured output and prompt chain
+    system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
+    structured_llm = llm.with_structured_output(Compare)
+    prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
+    chain = prompt | structured_llm
+    compare_data = []
+    # Iterate over sopChunk_df and insight_df to process "ChunkText" and "insight"
+    for sopChunk_index, sopChunk_row in sopChunk_df.iterrows():
+        sop_chunk_text = sopChunk_row["ChunkText"]  # Extract the ChunkText column
+        for insight_index, insight_row in insight_df.iterrows():
+            insight_text = insight_row["insight"]  # Extract the insight column
+            # Invoke the LLM with the extracted data
+            compare_response = chain.invoke({
+                "sopChunk": sop_chunk_text,
+                "insight": insight_text,
+                "topic": topic,
+                "GSKGlossary": GSKGlossary
+            })
+            # Append the response to insights_data
+            compare_data.append({
+                "ReviewNeeded": compare_response.review,
+                "Justification": compare_response.justification,
+                "SOP": sop_chunk_text,
+                "Insight": insight_text
+            })
+    # Return the insights as a single DataFrame
+    print(compare_data)
+    return pd.DataFrame(compare_data)
+def risk_score_process(compare_df, topic):
+    GSKGlossary = read_file("GSKGlossary")
+    SystemMessage = read_file("risk_scoring_system_message")
+    UserMessage = read_file("risk_scoring_user_message")
+# Define the Enum for predefined options
+    class RiskClassification(str, Enum):
+        HIGH = "high"
+        MEDIUM = "medium"
+        LOW = "low"
+    # Define the Pydantic model for the structured output
+    class Risk(BaseModel):
+        risk_level: RiskClassification = Field(
+            description="The selected classification option."
+        )
+        justification: str = Field(
+            description="Justify the reason for choosing this risk classification."
+        )
+        advice: str = Field(
+            description="Suggestions for changes that could be made to the standard operating procedure to mitigat the risk."
+        )
+    llm = AzureChatOpenAI(
+        openai_api_version=OPENAI_API_VERSION,
+        openai_api_key=OPENAI_API_KEY,
+        azure_endpoint=OPENAI_API_BASE,
+        openai_api_type=OPENAI_API_TYPE,
+        deployment_name=OPENAI_MODEL,
+        temperature=0,
+    )
+    system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
+    structured_llm = llm.with_structured_output(Risk)
+    prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
+    chain = prompt | structured_llm
+    risk_data = []
+    # Iterate over sopChunk_df and insight_df to process "ChunkText" and "insight"
+    for index, row in compare_df.iterrows():
+            # Invoke the LLM with the extracted data
+            risk_response = chain.invoke({
+                "comparison": row['Justification'],
+                "insight": row['Insight'],
+                "SOPchunk":row['SOP'],
+                "topic": topic
+            })
+            # Append the response to insights_data
+            risk_data.append({
+                "RiskLevel": risk_response.risk_level,
+                "Justification": risk_response.justification,
+                "advice": risk_response.advice,
+                "comparison": row['Justification'],
+                "insight": row['Insight'],
+                "SOPchunk":row['SOP']
+            })
+    # Return the insights as a single DataFrame
     return pd.DataFrame(risk_data)