File size: 7,306 Bytes
00bd2b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import re
import html
import uuid
import logging
from typing import List, Tuple, Optional

# Configure logging
logger = logging.getLogger(__name__)

def sanitize_input(text: str) -> str:
    """Sanitize user input to prevent potential injection attacks.
    
    Args:
        text: User input text
        
    Returns:
        Sanitized text with safe characters only
    """
    try:
        # Remove any potentially harmful characters while preserving basic formatting
        sanitized = re.sub(r'[<>]', '', text)
        # Remove any JavaScript event handlers
        sanitized = re.sub(r'on\w+="[^"]*"', '', sanitized, flags=re.IGNORECASE)
        # Limit length with increased capacity
        return sanitized[:2000]
    except Exception as e:
        logger.error(f"Error sanitizing input: {e}")
        return ""

def strip_html(text: str) -> str:
    """Remove HTML tags from text while preserving content structure.
    
    Args:
        text: HTML content to be stripped
        
    Returns:
        Plain text with HTML tags removed but content structure preserved
    """
    if not text:
        return ""
        
    # Replace line break tags with actual line breaks
    text = text.replace('<br>', '\n')
    text = text.replace('<br/>', '\n')
    text = text.replace('</p>', '\n\n')
    text = text.replace('</div>', '\n\n')
    
    # Replace list tags with appropriate formatting
    text = re.sub(r'</?ul>', '\n', text)
    text = re.sub(r'</?ol>', '\n', text)
    text = re.sub(r'<li>', '\n- ', text)
    
    # Remove remaining HTML tags
    clean_text = re.sub(r"<[^>]+>", "", text)
    
    # Clean up extra whitespace
    clean_text = re.sub(r'\n\s*\n', '\n\n', clean_text)
    return clean_text.strip()

def inject_interactive_elements(html_str: str) -> str:
    """
    Add interactive elements to HTML content like:
    - Copy buttons for code blocks
    - Expandable sections for long content
    - Syntax highlighting
    
    Args:
        html_str: HTML content with potential code blocks
        
    Returns:
        HTML content with interactive elements added
    """
    if not html_str or '```' not in html_str:
        return html_str
        
    import re
    
    # Add copy buttons to code blocks
    def add_copy_button(match):
        code_content = match.group(2)
        code_lang = match.group(1) if match.group(1) else "text"
        button_id = str(uuid.uuid4())[:8]
        
        return f'''
        <div style="position: relative; margin: 10px 0;">
            <button id="copy-btn-{button_id}" onclick="copyCode('{button_id}')" 
                style="position: absolute; top: 5px; right: 5px; z-index: 10; 
                       background: #f0f0f0; border: 1px solid #ccc; border-radius: 4px; 
                       padding: 4px 8px; cursor: pointer; font-size: 12px;">
                Copy
            </button>
            <pre style="padding: 20px 10px 10px 10px; border-radius: 8px; 
                        background: #f8f8f8; overflow-x: auto; position: relative;">
                <code class="language-{code_lang}">{html.escape(code_content)}</code>
            </pre>
        </div>
        '''
    
    # Process code blocks with language specification
    try:
        result = re.sub(r'```(\w*)\n(.*?)```', add_copy_button, html_str, flags=re.DOTALL)
        
        # Add JavaScript for copy functionality
        js_script = """
        <script>
        function copyCode(elementId) {
            const button = document.getElementById('copy-btn-' + elementId);
            const codeBlock = button.nextElementSibling.querySelector('code');
            const text = codeBlock.textContent;
            
            navigator.clipboard.writeText(text).then(() => {
                const originalText = button.textContent;
                button.textContent = 'Copied!';
                setTimeout(() => {
                    button.textContent = originalText;
                }, 2000);
            }).catch(err => {
                console.error('Failed to copy: ', err);
                button.textContent = 'Failed';
                setTimeout(() => {
                    button.textContent = 'Copy';
                }, 2000);
            });
        }
        
        // Initialize syntax highlighting
        document.addEventListener('DOMContentLoaded', (event) => {
            document.querySelectorAll('pre code').forEach((el) => {
                hljs.highlightElement(el);
            });
        });
        </script>
        """
        
        # Add syntax highlighting CSS if needed
        css_link = '<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/styles/github.min.css">\n'
        hljs_script = '<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/highlight.min.js"></script>\n'
        
        # Add the script and CSS if we have code blocks
        result = css_link + hljs_script + result + js_script
        
        return result
    except Exception as e:
        logger.error(f"Error adding interactive elements: {e}")
        return html_str

def detect_language_from_context(question: str, topic: str) -> str:
    """Detect the programming language based on question and topic context.
    
    Args:
        question: User's question text
        topic: Main topic of the query
        
    Returns:
        Detected programming language code
    """
    # Language mapping with common indicators
    mapping = {
        "Python": ["python", "pandas", "numpy", "matplotlib", "dataframe"],
        "SQL": ["sql", "query", "database", "select", "join"],
        "JavaScript": ["javascript", "js", "react", "dom", "node"],
        "Java": ["java", "spring", "hibernate"],
        "C#": ["c#", "csharp", "dotnet", ".net"],
        "Power BI": ["dax", "powerbi", "power bi", "pbix"],
        "Data Visualization": ["visualization", "chart", "plot", "graph"],
        "HTML": ["html", "markup", "webpage"],
        "CSS": ["css", "stylesheet"],
        "Shell": ["bash", "shell", "command", "script"]
    }
    
    # Check topic first with exact matches
    for lang, keywords in mapping.items():
        for keyword in keywords:
            if keyword.lower() in topic.lower():
                return lang.lower()
    
    # Check question for additional clues
    question_lower = question.lower()
    for lang, keywords in mapping.items():
        for keyword in keywords:
            if keyword.lower() in question_lower:
                return lang.lower()
    
    return "text"

def truncate_text(text: str, max_length: int = 500, min_length: int = 200) -> str:
    """Truncate text to a maximum length while trying to preserve meaningful content.
    
    Args:
        text: Text to truncate
        max_length: Maximum length for the truncated text
        min_length: Minimum length before adding ellipsis
        
    Returns:
        Truncated text with ellipsis if needed
    """
    if not text:
        return ""
        
    if len(text) <= max_length:
        return text
        
    # Try to find a natural break point
    space_index = text.rfind(' ', min_length, max_length)
    if space_index > 0:
        return text[:space_index] + "..."
    
    # Fallback to simple truncation
    return text[:max_length] + "..."