File size: 4,857 Bytes
936432e
 
 
 
 
 
 
 
 
 
9a11f29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
936432e
 
9a11f29
 
 
 
 
 
 
 
936432e
 
 
5c894d1
936432e
 
 
 
7753922
936432e
 
 
7e4ef99
 
936432e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a11f29
 
936432e
 
 
9a11f29
936432e
 
9a11f29
 
936432e
 
 
 
 
 
 
 
 
 
 
 
 
9a11f29
936432e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e131578
936432e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# app.py

import os
import gradio as gr

# Import your modules
from parsers import parse_pcap
from analysis import analyze_calls
from call_flow import create_call_flow_diagram

# We'll adapt llm_utils to load flan-t5-base locally
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

def create_local_pipeline(model_id="google/flan-t5-base"):
    """

    Create a local pipeline for Flan-T5, which is a seq2seq model.

    This should run within ~16GB RAM for 'flan-t5-base'.

    """
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_id, device_map="auto")
    # Use text2text-generation for T5-based models
    return pipeline("text2text-generation", model=model, tokenizer=tokenizer)

# Initialize model pipeline once
MODEL_ID = "google/flan-t5-base"
generator = create_local_pipeline(MODEL_ID)

def get_llm_opinion(prompt, generator):
    """

    Generate text from the local Flan-T5 pipeline.

    We use text2text-generation to handle the seq2seq nature of T5.

    """
    outputs = generator(prompt, max_length=256, do_sample=True, temperature=0.5)
    return outputs[0]["generated_text"]

def process_file(pcap_file):
    """

    This function is called when user clicks the 'Analyze File' button.

    - pcap_file: A dictionary returned by Gradio's File component.

    Returns:

    - a textual summary (analysis + call flow)

    - the dictionary of calls_by_id (saved in Gradio state so we can pass them to the LLM)

    """
    print("DEBUG pcap_file =", pcap_file, type(pcap_file))  # <--- debug line
    if not pcap_file:
        return "No file uploaded.", {}

    # pcap_file is a string path to the temp file, e.g. "/tmp/gradio/xyz/somefile.pcap"
    temp_filename = pcap_file

    # 1) Parse the PCAP
    calls_by_id = parse_pcap(temp_filename)

    # 2) Analyze the calls
    analysis_result = analyze_calls(calls_by_id)

    # 3) Create a call flow diagram (textual)
    call_flow_text = create_call_flow_diagram(calls_by_id)

    # Combine them into one display string
    result_text = (
        f"=== VoIP Analysis ===\n"
        f"{analysis_result}\n\n"
        f"=== Call Flow Diagram(s) ===\n"
        f"{call_flow_text}"
    )

    # Delete the temp file if desired
    os.remove(temp_filename)

    return result_text, calls_by_id

def ask_llm_opinion(calls_data, question):
    """

    This function passes the call analysis info + user question to the LLM (local pipeline).

    - calls_data: The dictionary of calls returned from parse_pcap() [Gradio state]

    - question: The user’s question in text form

    """
    if not calls_data:
        return "No call data available. Please upload and analyze a PCAP first."

    if not question.strip():
        return "Please enter a question."

    # Summarize the calls for context
    calls_context = "Below is a brief description of the calls found in the PCAP:\n"
    for call_id, call_obj in calls_data.items():
        calls_context += f"- Call-ID: {call_id}, from_tag: {call_obj.from_tag}, to_tag: {call_obj.to_tag}\n"

    # Build a prompt for T5
    prompt = (
        f"{calls_context}\n"
        f"User's question: {question}\n\n"
        "Please provide your expert VoIP analysis or advice."
    )

    # Query the local pipeline
    llm_response = get_llm_opinion(prompt, generator=generator)
    return llm_response

def main():
    """

    Build the Gradio interface with two tabs:

    1) PCAP Analysis

    2) LLM Consultation

    """
    with gr.Blocks() as demo:
        gr.Markdown("# VoIP Analyzer\nUpload a PCAP/PCAPNG file for SIP/RTP analysis. Then consult Flan-T5 for further insights.")

        # We keep the calls data in a Gradio State so we can pass it between tabs
        calls_state = gr.State({})

        with gr.Tab("PCAP Analysis"):
            file_input = gr.File(label="Upload a PCAP or PCAPNG file")
            analyze_button = gr.Button("Analyze File")
            analysis_output = gr.Textbox(label="Analysis & Call Flow", lines=20)

            analyze_button.click(
                fn=process_file,
                inputs=file_input,
                outputs=[analysis_output, calls_state]
            )

        with gr.Tab("LLM Consultation"):
            question_input = gr.Textbox(label="Ask a question about the call(s)")
            ask_button = gr.Button("Ask LLM")
            llm_output = gr.Textbox(label="LLM Response", lines=10)

            ask_button.click(
                fn=ask_llm_opinion,
                inputs=[calls_state, question_input],
                outputs=[llm_output]
            )

    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)

if __name__ == "__main__":
    main()