Spaces:

wanggenfu
/

classifyEmail

Sleeping

File size: 10,855 Bytes

import gradio as gr
import os
import json
from typing import TypedDict, List, Dict, Any, Optional
from langgraph.graph import StateGraph, START, END
# from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, AIMessage
# from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from huggingface_hub import InferenceClient

class EmailState(TypedDict):
    # 正在处理的电子邮件
    email: Dict[str, Any]  # 包含主题、发件人、正文等。

    # 分析与决策
    is_spam: Optional[bool]

    spam_reason: Optional[str]

    email_category: Optional[str]

    # 响应生成
    draft_response: Optional[str]

    # 处理元数据
    messages: List[Dict[str, Any]]  # 跟踪与 LLM 的对话以进行分析

# Initialize our LLM
# model = ChatOpenAI(temperature=0)
# model = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")

hf_token=os.getenv("hf_token")
model = InferenceClient(
    model="Qwen/Qwen2.5-Coder-32B-Instruct",
    token=hf_token,
    timeout=30
)

def query_llm(prompt: str):
    response = model.chat.completions.create(messages=[
      {
          "role": "user",
          "content": prompt
      }],temperature=0)
    return response.choices[0].message

def read_email(state: EmailState):
    """Alfred reads and logs the incoming email"""
    email = state["email"]

    # 在这里我们可能会做一些初步的预处理
    print(f"Alfred is processing an email from {email['sender']} with subject: {email['subject']}")

    # 这里不需要更改状态
    return {}


def classify_email(state: EmailState):
    """Alfred uses an LLM to determine if the email is spam or legitimate"""
    email = state["email"]

    # 为 LLM 准备提示
    prompt = f"""
    As Alfred the butler, analyze this email and determine if it is spam or legitimate.

    Email:
    From: {email['sender']}
    Subject: {email['subject']}
    Body: {email['body']}

    First, determine if this email is spam. If it is spam, explain why.
    If it is legitimate, categorize it (inquiry, complaint, thank you, etc.).

    Please answer strictly in the following format:
    Email category: inquiry, complaint, thank you, spam, etc.
    Is spam:
    Reason for spam:
    """

    # Call the LLM
    # messages = [HumanMessage(content=prompt)]
    response = query_llm(prompt)

    # 解析响应的简单逻辑（在实际应用中，您需要更强大的解析）
    response_text = response.content.lower()
    # print(f"test response_text:{response_text}")
    is_spam = "is spam: yes" in response_text
    # print(f"test is_spam:{is_spam and "reasoning:" in response_text}")
    # print(f"test reasoning:{response_text.split("reasoning:")[1].strip()}")
    # 如果是垃圾邮件，请提取原因,输出的格式可能不同，需要根据提示词固定格式。
    spam_reason = None
    spam_reason_keyword="reason for spam:"
    if is_spam and spam_reason_keyword in response_text:
        spam_reason = response_text.split(spam_reason_keyword)[1].strip()

    # print(f"test spam_reason:{spam_reason}")

    # 确定类别是否合法
    email_category = None
    if not is_spam:
        categories = ["inquiry", "complaint", "thank you", "request", "information"]
        for category in categories:
            if category in response_text:
                email_category = category
                break

    # 更新消息以进行追踪
    new_messages = state.get("messages", []) + [
        {"role": "user", "content": prompt},
        {"role": "assistant", "content": response.content}
    ]

    # 返回状态更新
    return {
        "is_spam": is_spam,
        "spam_reason": spam_reason,
        "email_category": email_category,
        "messages": new_messages
    }

def route_email(state: EmailState) -> str:
    """Determine the next step based on spam classification"""
    # print(f"route_email,state={state}")
    if state["is_spam"]:
        return "spam"
    else:
        return "legitimate"


def handle_spam(state: EmailState):
    """Alfred discards spam email with a note"""
    # print(f"handle_spam,state={state}")
    # print(f"is_spam:{state.get("is_spam","none")}")
    print(f"Alfred has marked the email as spam. Reason: {state.get("spam_reason","none")}")
    print("The email has been moved to the spam folder.")

    # 我们已处理完这封电子邮件
    return {}

def draft_response(state: EmailState):
    """Alfred drafts a preliminary response for legitimate emails"""
    email = state["email"]
    category = state["email_category"] or "general"

    # 为 LLM 准备提示词
    prompt = f"""
    As Alfred the butler, draft a polite preliminary response to this email.

    Email:
    From: {email['sender']}
    Subject: {email['subject']}
    Body: {email['body']}

    This email has been categorized as: {category}

    Draft a brief, professional response that Mr. Hugg can review and personalize before sending.
    """

    # Call the LLM
    # messages = [HumanMessage(content=prompt)]
    response = query_llm(prompt)

    # 更新消息以进行追踪
    new_messages = state.get("messages", []) + [
        {"role": "user", "content": prompt},
        {"role": "assistant", "content": response.content}
    ]

    # 返回状态更新
    return {
        "draft_response": response.content,
        "messages": new_messages
    }

def notify_mr_hugg(state: EmailState):
    """Alfred notifies Mr. Hugg about the email and presents the draft response"""
    email = state["email"]

    print("\n" + "="*50)
    print(f"Sir, you've received an email from {email['sender']}.")
    print(f"Subject: {email['subject']}")
    print(f"Category: {state['email_category']}")
    print("\nI've prepared a draft response for your review:")
    print("-"*50)
    print(state["draft_response"])
    print("="*50 + "\n")

    # 我们已处理完这封电子邮件
    return {}



# 创建 graph
email_graph = StateGraph(EmailState)

# 添加 nodes
email_graph.add_node("read_email", read_email)
email_graph.add_node("classify_email", classify_email)
email_graph.add_node("handle_spam", handle_spam)
email_graph.add_node("draft_response", draft_response)
email_graph.add_node("notify_mr_hugg", notify_mr_hugg)

# 添加 edges - 定义流程
email_graph.add_edge(START, "read_email")
email_graph.add_edge("read_email", "classify_email")

# 从 classify_email 添加条件分支
email_graph.add_conditional_edges(
    "classify_email",
    route_email,
    {
        "spam": "handle_spam",
        "legitimate": "draft_response"
    }
)

# 添加最后的 edges
email_graph.add_edge("handle_spam", END)
email_graph.add_edge("draft_response", "notify_mr_hugg")
email_graph.add_edge("notify_mr_hugg", END)

# 编译 graph
compiled_graph = email_graph.compile()


# 合法电子邮件示例
legitimate_email = {
    "sender": "john.smith@example.com",
    "subject": "Question about your services",
    "body": "Dear Mr. Hugg, I was referred to you by a colleague and I'm interested in learning more about your consulting services. Could we schedule a call next week? Best regards, John Smith"
}

# 垃圾邮件示例
spam_email = {
    "sender": "winner@lottery-intl.com",
    "subject": "YOU HAVE WON $5,000,000!!!",
    "body": "CONGRATULATIONS! You have been selected as the winner of our international lottery! To claim your $5,000,000 prize, please send us your bank details and a processing fee of $100."
}

def classify_email(json_input):
    email = json.loads(json_input)
    result = compiled_graph.invoke({
        "email": email,
        "is_spam": None,
        "spam_reason": None,
        "email_category": None,
        "draft_response": None,
        "messages": []
    })
    return f"is_spam:{result.get("is_spam")}\nspam_reason:{result.get("spam_reason")}"


# ===================== 核心处理函数（模拟邮件分析） =====================
def classify_email(sender: str, subject: str, body: str) -> tuple[str, str]:
    if sender.strip() ==  "" or subject.strip() == "" or body.strip() == "" :
        return "true", "sender、subject、subject为空"
        
    email = {
        "sender": sender,
        "subject": subject,
        "body": body
    }
    
    result = compiled_graph.invoke({
        "email": email,
        "is_spam": None,
        "spam_reason": None,
        "email_category": None,
        "draft_response": None,
        "messages": []
    })
    return result.get("is_spam"), result.get("spam_reason")


# ===================== Gradio 表单界面构建 =====================
with gr.Blocks(title="邮件分析表单", theme=gr.themes.Soft()) as demo:
    # 标题说明
    gr.Markdown("### 📧 邮件垃圾信息分析表单")
    gr.Markdown("请输入以下3项信息，提交后自动分析邮件是否为垃圾邮件")

    # 表单区域：3个文本输入框
    with gr.Row():  # 行布局，可选：改为 gr.Column() 垂直布局
        with gr.Column(scale=1):
            sender_input = gr.Textbox(
                label="1. 发件人邮箱地址",
                placeholder="john.smith@gmail.com",
                lines=1,
                max_lines=1
            )
        with gr.Column(scale=2):
            subject_input = gr.Textbox(
                label="2. 邮件主题",
                placeholder="预约会议",
                lines=1,
                max_lines=1
            )

    # 第三个文本框：多行输入（邮件内容）
    content_input = gr.Textbox(
        label="3. 邮件内容",
        placeholder="我想预约下周二的咨询会议",
        lines=5,
        max_lines=10
    )

    # 提交按钮
    submit_btn = gr.Button("📤 提交分析", variant="primary", size="lg")

    # 分割线
    # gr.Divider()

    # 结果展示区域（结构化输出）
    gr.Markdown("### 📝 分析结果")
    with gr.Row():
        is_spam_output = gr.Textbox(label="是否垃圾邮件", interactive=False)
        reason_output = gr.Textbox(label="判定原因", lines=3, interactive=False)

    # 绑定按钮与处理函数
    submit_btn.click(
        fn=classify_email,
        inputs=[sender_input, subject_input, content_input],  # 3个输入框
        outputs=[is_spam_output, reason_output]  # 2个输出框
    )

    # 重置按钮（可选）
    reset_btn = gr.Button("🔄 清空输入")
    reset_btn.click(
        fn=lambda: ("", "", "", "", "", ""),  # 重置所有输入输出
        inputs=[],
        outputs=[sender_input, subject_input, content_input, is_spam_output, reason_output]
    )

# ===================== 启动界面 =====================
if __name__ == "__main__":
    # server_name="0.0.0.0" 允许局域网访问，server_port 自定义端口
    demo.launch()

# demo = gr.Interface(fn=classify_email, inputs="text", outputs="text")
# demo.launch()