Spaces:

fabioantonini
/

VoIPAnalyzer

Sleeping

App Files Files Community

fabioantonini commited on Mar 21, 2025

Commit

936432e

verified ·

1 Parent(s): 9973426

Upload 5 files

Browse files

first implementation

Files changed (5) hide show

analysis.py +101 -0
app.py +120 -0
call_flow.py +83 -0
parsers.py +158 -0
requirements.txt +12 -0

analysis.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# analysis.py
+from datetime import datetime
+def analyze_calls(calls_by_id):
+    """
+    Inspect each Call object in 'calls_by_id' and identify issues such as:
+      - Call never answered
+      - Call never ended
+      - No RTP packets
+      - Very short RTP stream
+    Return a textual summary of the calls and any identified issues.
+    """
+    if not calls_by_id:
+        return "No calls found in the capture."
+    summary_lines = []
+    for call_id, call_obj in calls_by_id.items():
+        # Basic call info
+        call_info = f"Call-ID: {call_id}"
+        call_info += f"\n  From-Tag: {call_obj.from_tag}"
+        call_info += f"\n  To-Tag: {call_obj.to_tag}"
+        # Times
+        invite_time = call_obj.invite_time
+        answer_time = call_obj.answer_time
+        end_time = call_obj.end_time
+        call_info += f"\n  Invite time: {invite_time}"
+        call_info += f"\n  Answer time: {answer_time}"
+        call_info += f"\n  End time: {end_time}"
+        # Gather all issues for this call
+        issues = spot_issues(call_obj)
+        # Media summary
+        media_info = []
+        for ssrc, media_stream in call_obj.media_streams.items():
+            media_info.append(
+                f"    SSRC: {ssrc}, Payload: {media_stream.payload_type}, Packets: {media_stream.packets}"
+            )
+        if media_info:
+            call_info += "\n  Media Streams:\n" + "\n".join(media_info)
+        else:
+            call_info += "\n  Media Streams: None"
+        # Combine call info and issues
+        if issues:
+            call_info += "\n  Issues:"
+            for i in issues:
+                call_info += f"\n    - {i}"
+        else:
+            call_info += "\n  No major issues identified."
+        summary_lines.append(call_info + "\n")
+    return "\n".join(summary_lines)
+def spot_issues(call_obj):
+    """
+    Given a Call object, return a list of textual issues found.
+    This is a naive example – adapt it as needed.
+    """
+    issues = []
+    # 1) Was there an INVITE but no 200 OK => never answered
+    if call_obj.invite_time and not call_obj.answer_time:
+        issues.append("Call was never answered (no 200 OK).")
+    # 2) Was there an answer but no BYE => never properly ended
+    #    (We assume end_time is set when a BYE occurs or 200 to BYE is seen.)
+    if call_obj.answer_time and not call_obj.end_time:
+        issues.append("Call was never ended (no BYE).")
+    # 3) Check if RTP packets exist
+    total_rtp_packets = sum(stream.packets for stream in call_obj.media_streams.values())
+    if total_rtp_packets == 0:
+        issues.append("No RTP packets observed.")
+    elif total_rtp_packets < 50:
+        # Arbitrary threshold just to highlight short calls or potential media problems
+        issues.append(f"Very few RTP packets ({total_rtp_packets}). May indicate a short or broken stream.")
+    # 4) Optional: check duration vs. packet count
+    #    If call was answered but we see extremely few packets, there's likely an issue
+    if call_obj.answer_time and call_obj.end_time:
+        call_duration = (call_obj.end_time - call_obj.answer_time).total_seconds()
+        if call_duration > 0:
+            rtp_rate = total_rtp_packets / call_duration
+            # E.g., if we consider typical G.711 at 50 packets/sec in each direction,
+            # and we see less than 10 pkts/sec => potential audio problem
+            if rtp_rate < 10:
+                issues.append(
+                    f"Low RTP packet rate ({rtp_rate:.1f} pkts/sec). Possible audio issue."
+                )
+    # Additional checks (jitter, packet loss, incomplete SDP, etc.) can be added here.
+    return issues

app.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# app.py
+import os
+import gradio as gr
+# Import your modules
+from parsers import parse_pcap
+from analysis import analyze_calls
+from call_flow import create_call_flow_diagram
+from llm_utils import create_local_pipeline, get_llm_opinion
+# 1) Initialize your local Hugging Face pipeline (model)
+#    Adjust model_id to your chosen model on HF.
+MODEL_ID = "tiiuae/falcon-7b-instruct"
+generator = create_local_pipeline(MODEL_ID)
+def process_file(pcap_file):
+    """
+    This function is called when user clicks the 'Analyze File' button.
+    - pcap_file: The uploaded PCAP file object from Gradio.
+    Returns:
+    - a textual summary (analysis + call flow)
+    - the dictionary of calls_by_id (saved in Gradio state so we can pass them to the LLM)
+    """
+    if not pcap_file:
+        return "No file uploaded.", {}
+    # Save the uploaded file to a temporary path
+    temp_filename = pcap_file.name
+    with open(temp_filename, "wb") as f:
+        f.write(pcap_file.read())
+    # 1) Parse the PCAP
+    calls_by_id = parse_pcap(temp_filename)
+    # 2) Analyze the calls
+    analysis_result = analyze_calls(calls_by_id)
+    # 3) Create a call flow diagram (textual)
+    call_flow_text = create_call_flow_diagram(calls_by_id)
+    # Combine them into one display string
+    result_text = (
+        f"=== VoIP Analysis ===\n"
+        f"{analysis_result}\n\n"
+        f"=== Call Flow Diagram(s) ===\n"
+        f"{call_flow_text}"
+    )
+    # Delete the temp file if desired
+    os.remove(temp_filename)
+    return result_text, calls_by_id
+def ask_llm_opinion(calls_data, question):
+    """
+    This function passes the call analysis info + user question to the LLM (local pipeline).
+    - calls_data: The dictionary of calls returned from parse_pcap() [Gradio state]
+    - question: The user’s question in text form
+    """
+    if not calls_data:
+        return "No call data available. Please upload and analyze a PCAP first."
+    if not question.strip():
+        return "Please enter a question."
+    # You might want to create a summary of the calls or pass the entire calls_data.
+    # For brevity, let's just embed a brief mention that calls_data exist:
+    calls_context = "Below is a representation of the calls found in the PCAP:\n"
+    for call_id, call_obj in calls_data.items():
+        calls_context += f"- Call-ID: {call_id}, from_tag: {call_obj.from_tag}, to_tag: {call_obj.to_tag}\n"
+    prompt = (
+        f"{calls_context}\n"
+        f"User's question: {question}\n"
+        f"Please provide your expert VoIP analysis or advice."
+    )
+    # Query the local pipeline
+    llm_response = get_llm_opinion(prompt, generator=generator)
+    return llm_response
+def main():
+    """
+    Build the Gradio interface with two tabs:
+    1) PCAP Analysis
+    2) LLM Consultation
+    """
+    with gr.Blocks() as demo:
+        gr.Markdown("# VoIP Analyzer\nUpload a PCAP/PCAPNG file for SIP/RTP analysis. Then consult an LLM for further insights.")
+        # We keep the calls data in a Gradio State so we can pass it between tabs
+        calls_state = gr.State({})
+        with gr.Tab("PCAP Analysis"):
+            file_input = gr.File(label="Upload a PCAP or PCAPNG file")
+            analyze_button = gr.Button("Analyze File")
+            analysis_output = gr.Textbox(label="Analysis & Call Flow", lines=20)
+            analyze_button.click(
+                fn=process_file,
+                inputs=file_input,
+                outputs=[analysis_output, calls_state]
+            )
+        with gr.Tab("LLM Consultation"):
+            question_input = gr.Textbox(label="Ask a question about the call(s)")
+            ask_button = gr.Button("Ask LLM")
+            llm_output = gr.Textbox(label="LLM Response", lines=10)
+            ask_button.click(
+                fn=ask_llm_opinion,
+                inputs=[calls_state, question_input],
+                outputs=[llm_output]
+            )
+    demo.launch(server_name="0.0.0.0", server_port=7860)
+if __name__ == "__main__":
+    main()

call_flow.py ADDED Viewed

	@@ -0,0 +1,83 @@

+# call_flow.py
+def create_call_flow_diagram(calls_by_id):
+    """
+    Given a dictionary of Call objects (keyed by call_id), generate
+    a textual call flow for each call. We assume each Call has a
+    'sip_sequence' attribute: a list of dicts with:
+        {
+          'timestamp': datetime object,
+          'src_ip': str,
+          'dst_ip': str,
+          'message': str  # e.g. "INVITE (Call-ID: abc)"
+        }
+    The output will look like a classic ASCII SIP ladder diagram.
+    Returns a string that concatenates the diagrams for all calls.
+    """
+    if not calls_by_id:
+        return "No calls found to display."
+    # We'll combine diagrams for each call into one text output
+    output = []
+    for call_id, call_obj in calls_by_id.items():
+        # Retrieve the SIP sequence in chronological order (by timestamp)
+        # You must ensure your parser sets call_obj.sip_sequence sorted by time
+        sip_sequence = getattr(call_obj, 'sip_sequence', [])
+        sip_sequence = sorted(sip_sequence, key=lambda x: x['timestamp'])
+        # Get unique participants for a horizontal layout
+        # We'll just gather distinct IP addresses from the sequence
+        # and put them left (caller) -> right (callee).
+        # If you know which is the caller vs. callee, you can fix that order.
+        participants = sorted(
+            list({msg['src_ip'] for msg in sip_sequence} | {msg['dst_ip'] for msg in sip_sequence})
+        )
+        if len(participants) < 2:
+            # If we somehow only have one or zero participants, skip
+            output.append(f"Call-ID {call_id} only has one participant.\n")
+            continue
+        # We’ll just place the first IP as “Left” and last IP as “Right” for demonstration
+        left_participant = participants[0]
+        right_participant = participants[-1]
+        # Header for this call
+        diagram_lines = []
+        diagram_lines.append(f"Call Flow for Call-ID: {call_id}")
+        diagram_lines.append(f"   {left_participant:<30}                 {right_participant}")
+        diagram_lines.append("   -----------------------------------------------------------")
+        # Each SIP message in chronological order
+        for msg in sip_sequence:
+            src = msg['src_ip']
+            dst = msg['dst_ip']
+            message = msg['message']
+            if src == left_participant and dst == right_participant:
+                # Left -> Right
+                diagram_lines.append(f"   {message:<30}  ----------------->")
+            elif src == right_participant and dst == left_participant:
+                # Right -> Left
+                # We want the message on the right side, but in ASCII, we can do:
+                # Some spaces, then the message, then <----
+                # For demonstration, we do something simpler:
+                diagram_lines.append(f"                                 {message}")
+                diagram_lines.append("   <-------------------------------")
+            else:
+                # If the message is between some other IP pair, we can either
+                # skip or try to align it in the middle. For now, just note it.
+                # This might happen if there are multiple proxies or servers.
+                diagram_lines.append(f"   [{src} -> {dst}] {message}")
+        # Example: If we want to artificially place an "RTP ...." line
+        # you could do a simple check if the call has media streams:
+        if call_obj.media_streams:
+            diagram_lines.append("   RTP ...................... RTP ....................")
+        # Add a blank line after the call’s diagram
+        output.append("\n".join(diagram_lines) + "\n")
+    return "\n".join(output)

parsers.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# parsers.py
+import pyshark
+from datetime import datetime
+class MediaStream:
+    def __init__(self, ssrc=None, payload_type=None, packets=0):
+        self.ssrc = ssrc
+        self.payload_type = payload_type
+        self.packets = packets
+    def __repr__(self):
+        return (f"<MediaStream ssrc={self.ssrc}, "
+                f"payload_type={self.payload_type}, "
+                f"packets={self.packets}>")
+class Call:
+    def __init__(self, call_id):
+        self.call_id = call_id
+        self.from_tag = None
+        self.to_tag = None
+        self.invite_time = None
+        self.answer_time = None
+        self.end_time = None
+        self.sip_messages = []     # store raw SIP info if needed
+        self.media_streams = {}    # key: SSRC, value: MediaStream
+        # NEW: a chronological record of SIP messages:
+        # each entry: { 'timestamp', 'src_ip', 'dst_ip', 'message' }
+        self.sip_sequence = []
+    def __repr__(self):
+        return (f"<Call call_id={self.call_id}, from_tag={self.from_tag}, "
+                f"to_tag={self.to_tag}, invite_time={self.invite_time}, "
+                f"answer_time={self.answer_time}, end_time={self.end_time}, "
+                f"media_streams={list(self.media_streams.values())}>")
+def parse_pcap(pcap_path):
+    """
+    Parse a pcap/pcapng file using PyShark.
+    Return a dictionary of calls keyed by Call-ID.
+    Each call holds relevant SIP and RTP info, including a sip_sequence
+    for generating call flows.
+    """
+    capture = pyshark.FileCapture(pcap_path, keep_packets=False)
+    calls_by_id = {}
+    for packet in capture:
+        # Convert sniff timestamp to a datetime object
+        pkt_ts = datetime.utcfromtimestamp(float(packet.sniff_timestamp))
+        # Attempt to extract src/dst IP (assuming IPv4)
+        src_ip = getattr(packet.ip, 'src', None) if hasattr(packet, 'ip') else None
+        dst_ip = getattr(packet.ip, 'dst', None) if hasattr(packet, 'ip') else None
+        # --- Check for SIP packets ---
+        if 'sip' in packet:
+            sip_layer = packet.sip
+            try:
+                call_id = sip_layer.call_id.strip()
+            except AttributeError:
+                # If we can't find a Call-ID, skip
+                continue
+            if call_id not in calls_by_id:
+                calls_by_id[call_id] = Call(call_id)
+            call_obj = calls_by_id[call_id]
+            # Capture raw SIP message if desired
+            # e.g. "INVITE sip:..." or "SIP/2.0 200 OK"
+            raw_msg = (sip_layer.get_field_value('Request-Line') or
+                       sip_layer.get_field_value('Status-Line') or
+                       "UNKNOWN SIP MESSAGE")
+            call_obj.sip_messages.append(raw_msg)
+            # Try to parse from-tag and to-tag
+            try:
+                call_obj.from_tag = sip_layer.from_tag
+            except AttributeError:
+                pass
+            try:
+                call_obj.to_tag = sip_layer.to_tag
+            except AttributeError:
+                pass
+            # Determine whether it's a request or a response
+            # and build a short summary like "INVITE (Call-ID: abc)" or "200 (Call-ID: abc)"
+            message_summary = None
+            # Check request method
+            try:
+                method = sip_layer.Request_Line_Method.lower()
+                if method:
+                    # e.g. "INVITE" (uppercase) plus the Call-ID
+                    message_summary = f"{method.upper()} (Call-ID: {call_id})"
+            except AttributeError:
+                # No method => might be a response
+                pass
+            # If it's a response, we look at the status line
+            if not message_summary:
+                status_line = sip_layer.get_field_value('Status-Line')
+                if status_line:
+                    parts = status_line.split(None, 2)
+                    if len(parts) >= 2 and parts[1].isdigit():
+                        # e.g. "SIP/2.0 200 OK"
+                        code = parts[1]
+                        message_summary = f"{code} (Call-ID: {call_id})"
+                    else:
+                        message_summary = f"UNKNOWN RESPONSE (Call-ID: {call_id})"
+                else:
+                    message_summary = f"UNKNOWN SIP MESSAGE (Call-ID: {call_id})"
+            # Store the short summary in our new sip_sequence
+            call_obj.sip_sequence.append({
+                'timestamp': pkt_ts,
+                'src_ip': src_ip,
+                'dst_ip': dst_ip,
+                'message': message_summary
+            })
+            # Record key timestamps
+            # If we detect an INVITE
+            if message_summary.startswith("INVITE"):
+                call_obj.invite_time = call_obj.invite_time or pkt_ts
+            # If we detect a 200, treat as call answered (naive approach)
+            if message_summary.startswith("200"):
+                if call_obj.answer_time is None:
+                    call_obj.answer_time = pkt_ts
+            # If we detect a BYE or a 200 to a BYE => call ended
+            if "BYE" in message_summary:
+                call_obj.end_time = pkt_ts
+            # If there is an SDP part, you can parse media lines here (not shown)
+            # ...
+        # --- Check for RTP packets ---
+        elif 'rtp' in packet:
+            rtp_layer = packet.rtp
+            ssrc = getattr(rtp_layer, 'ssrc', None)
+            payload_type = getattr(rtp_layer, 'payload_type', None)
+            # This is a simplistic approach, not tying SSRC directly to the call
+            # with matching IP/port from SDP. Instead, we store the RTP in all calls.
+            # In practice, you'd match the IP/port from SDP to associate the flow
+            # with the correct Call.
+            for c_obj in calls_by_id.values():
+                if ssrc not in c_obj.media_streams:
+                    c_obj.media_streams[ssrc] = MediaStream(ssrc=ssrc, payload_type=payload_type)
+                c_obj.media_streams[ssrc].packets += 1
+    capture.close()
+    return calls_by_id

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+# For SIP/RTP parsing:
+scapy>=2.4.5
+# For building the Gradio interface:
+gradio>=3.23.0
+# For making API calls to OpenAI or Hugging Face:
+requests>=2.28.1
+# (Optional) if you want to use Hugging Face transformers/models locally:
+transformers>=4.26.0
+huggingface_hub>=0.10.1