Spaces:

meet12341234
/

LogLens

Sleeping

App Files Files Community

MeetInCode commited on Jul 27, 2025

Commit

ff17e47

1 Parent(s): 815a1eb

Add application file

Browse files

Files changed (1) hide show

app.py +281 -0

app.py ADDED Viewed

	@@ -0,0 +1,281 @@

+import gradio as gr
+import json
+import torch
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TFAutoModelForSeq2SeqLM
+# --- Model Loading ---
+# Summarization model (BART)
+def load_summarizer():
+    model_name = "VidhuMathur/bart-log-summarization"
+    model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    summarizer = pipeline(
+        "summarization",
+        model=model,
+        tokenizer=tokenizer,
+        device=0 if torch.cuda.is_available() else -1,
+    )
+    return summarizer
+# Causal LM for analysis (Qwen)
+def load_qwen():
+    model_name = "Qwen/Qwen3-0.6B"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    )
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = model.to(device)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    return model, tokenizer
+# --- Core Pipeline Functions ---
+def extract_json_simple(text):
+    start = text.find('{')
+    if start == -1:
+        return None
+    brace_count = 0
+    end = start
+    for i, char in enumerate(text[start:], start):
+        if char == '{':
+            brace_count += 1
+        elif char == '}':
+            brace_count -= 1
+            if brace_count == 0:
+                end = i + 1
+                break
+    if brace_count == 0:
+        return text[start:end]
+    return None
+def ensure_required_keys(analysis, summary):
+    required_keys = {
+        "root_cause": f"Issue identified from log analysis: {summary[:100]}...",
+        "debugging_steps": [
+            "Check system logs for error patterns",
+            "Verify service status and configuration",
+            "Test connectivity and permissions"
+        ],
+        "debug_commands": [
+            "systemctl status service-name",
+            "journalctl -u service-name -n 50",
+            "netstat -tlnp | grep port"
+        ],
+        "useful_links": [
+            "https://docs.system-docs.com/troubleshooting",
+            "https://stackoverflow.com/questions/tagged/debugging"
+        ]
+    }
+    for key, default_value in required_keys.items():
+        if key not in analysis or not analysis[key]:
+            analysis[key] = default_value
+        elif isinstance(analysis[key], list) and len(analysis[key]) == 0:
+            analysis[key] = default_value
+    return analysis
+def create_fallback_analysis(summary):
+    summary_lower = summary.lower()
+    if any(word in summary_lower for word in ['database', 'connection', 'sql']):
+        return {
+            "root_cause": "Database connection issue detected in the logs",
+            "debugging_steps": [
+                "Check if database service is running",
+                "Verify database connection parameters",
+                "Test network connectivity to database server",
+                "Check database user permissions"
+            ],
+            "debug_commands": [
+                "sudo systemctl status postgresql",
+                "netstat -an | grep 5432",
+                "psql -U username -h host -d database",
+                "ping database-host"
+            ],
+            "useful_links": [
+                "https://www.postgresql.org/docs/current/runtime.html",
+                "https://dev.mysql.com/doc/refman/8.0/en/troubleshooting.html"
+            ]
+        }
+    elif any(word in summary_lower for word in ['memory', 'heap', 'oom']):
+        return {
+            "root_cause": "Memory exhaustion or memory leak detected",
+            "debugging_steps": [
+                "Monitor current memory usage",
+                "Check for memory leaks in application",
+                "Review JVM heap settings if Java application",
+                "Analyze memory dump if available"
+            ],
+            "debug_commands": [
+                "free -h",
+                "top -o %MEM",
+                "jstat -gc PID",
+                "ps aux --sort=-%mem | head"
+            ],
+            "useful_links": [
+                "https://docs.oracle.com/javase/8/docs/technotes/guides/troubleshoot/memleaks.html",
+                "https://linux.die.net/man/1/free"
+            ]
+        }
+    elif any(word in summary_lower for word in ['disk', 'space', 'full']):
+        return {
+            "root_cause": "Disk space exhaustion causing system issues",
+            "debugging_steps": [
+                "Check disk usage across all filesystems",
+                "Identify largest files and directories",
+                "Clean up temporary files and logs",
+                "Check for deleted files held by processes"
+            ],
+            "debug_commands": [
+                "df -h",
+                "du -sh /* | sort -hr",
+                "find /var/log -type f -size +100M",
+                "lsof +L1"
+            ],
+            "useful_links": [
+                "https://linux.die.net/man/1/df",
+                "https://www.cyberciti.biz/faq/linux-check-disk-space-command/"
+            ]
+        }
+    else:
+        return {
+            "root_cause": f"System issue detected: {summary[:100]}...",
+            "debugging_steps": [
+                "Review complete error logs",
+                "Check system resource usage",
+                "Verify service configurations",
+                "Test system connectivity"
+            ],
+            "debug_commands": [
+                "systemctl --failed",
+                "journalctl -p err -n 50",
+                "htop",
+                "netstat -tlnp"
+            ],
+            "useful_links": [
+                "https://linux.die.net/man/1/systemctl",
+                "https://www.freedesktop.org/software/systemd/man/journalctl.html"
+            ]
+        }
+def log_processing_pipeline(raw_log, summarizer, model, tokenizer):
+    results = {
+        'raw_log': raw_log,
+        'summary': None,
+        'analysis': None,
+        'success': False,
+        'errors': []
+    }
+    # Step 1: Summarization
+    try:
+        summary_result = summarizer(raw_log, max_length=350, min_length=40, do_sample=False)
+        summary_text = summary_result[0]['summary_text']
+        results['summary'] = summary_text
+    except Exception as e:
+        results['errors'].append(f"Summarization failed: {e}")
+        return results
+    # Step 2: Analysis
+    success = False
+    attempts = 0
+    max_attempts = 2
+    while not success and attempts < max_attempts:
+        attempts += 1
+        prompt = f"""Analyze this log summary and respond with ONLY a JSON object:\n\nLog: {summary_text}\n\nRequired JSON format:\n{{\n  \"root_cause\": \"explain the main problem\",\n  \"debugging_steps\": [\"step 1\", \"step 2\", \"step 3\"],\n  \"debug_commands\": [\"command1\", \"command2\", \"command3\"],\n  \"useful_links\": [\"link1\", \"link2\"]\n}}\n\nJSON:"""
+        try:
+            inputs = tokenizer(prompt, return_tensors="pt", max_length=800, truncation=True)
+            device = next(model.parameters()).device
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            with torch.no_grad():
+                outputs = model.generate(
+                    **inputs,
+                    max_new_tokens=300,
+                    temperature=0.2,
+                    do_sample=True,
+                    pad_token_id=tokenizer.eos_token_id,
+                    eos_token_id=tokenizer.eos_token_id,
+                    repetition_penalty=1.1
+                )
+            response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
+            json_str = extract_json_simple(response)
+            if json_str:
+                try:
+                    parsed = json.loads(json_str)
+                    fixed_analysis = ensure_required_keys(parsed, summary_text)
+                    results['analysis'] = fixed_analysis
+                    results['success'] = True
+                    success = True
+                except json.JSONDecodeError:
+                    if attempts == max_attempts:
+                        results['errors'].append(f"JSON parsing failed after {attempts} attempts")
+            else:
+                if attempts == max_attempts:
+                    results['errors'].append("No valid JSON found in response")
+        except Exception as e:
+            if attempts == max_attempts:
+                results['errors'].append(f"Generation failed: {e}")
+    if not results['success']:
+        results['analysis'] = create_fallback_analysis(summary_text)
+        results['success'] = True
+        results['errors'].append("Used fallback analysis due to model issues")
+    return results
+# --- Gradio Interface ---
+def process_log_file(file_obj, summarizer, model, tokenizer):
+    if file_obj is None:
+        return ("No file uploaded", "", "", "", "")
+    try:
+        encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
+        log_content = None
+        for encoding in encodings:
+            try:
+                with open(file_obj.name, 'r', encoding=encoding) as f:
+                    log_content = f.read()
+                break
+            except UnicodeDecodeError:
+                continue
+        if log_content is None:
+            return ("Encoding error", "", "", "", "")
+        if not log_content.strip():
+            return ("Empty file", "", "", "", "")
+        if len(log_content) > 100000:
+            log_content = log_content[:100000] + "\n... (file truncated)"
+        results = log_processing_pipeline(log_content, summarizer, model, tokenizer)
+        if results['success']:
+            analysis = results['analysis']
+            return (
+                "Analysis complete",
+                results['summary'],
+                analysis.get('root_cause', ''),
+                '\n'.join(analysis.get('debugging_steps', [])),
+                '\n'.join(analysis.get('debug_commands', [])),
+                '\n'.join(analysis.get('useful_links', [])),
+                json.dumps(results, indent=2)
+            )
+        else:
+            return ("Analysis failed", "", "", "", "")
+    except Exception as e:
+        return (f"Processing error: {str(e)}", "", "", "", "")
+def main():
+    summarizer = load_summarizer()
+    model, tokenizer = load_qwen()
+    with gr.Blocks(title="Minimal LogLens") as app:
+        gr.Markdown("# Minimal LogLens Log Analyzer")
+        file_input = gr.File(label="Upload Log File", file_types=[".txt", ".log", ".out", ".err"], type="filepath")
+        analyze_btn = gr.Button("Analyze Log")
+        status = gr.Textbox(label="Status", interactive=False)
+        summary = gr.Textbox(label="Summary", lines=3, interactive=False)
+        root_cause = gr.Textbox(label="Root Cause", lines=2, interactive=False)
+        debug_steps = gr.Textbox(label="Debugging Steps", lines=4, interactive=False)
+        debug_commands = gr.Textbox(label="Debug Commands", lines=4, interactive=False)
+        useful_links = gr.Textbox(label="Useful Links", lines=2, interactive=False)
+        json_output = gr.Code(label="Full JSON Output", language="json", interactive=False)
+        analyze_btn.click(
+            fn=lambda f: process_log_file(f, summarizer, model, tokenizer),
+            inputs=file_input,
+            outputs=[status, summary, root_cause, debug_steps, debug_commands, useful_links, json_output]
+        )
+    app.launch()
+if __name__ == "__main__":
+    main()