File size: 12,179 Bytes
ada3e2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
"""Knowledge Base for detecting and learning unknown concepts"""

import json
import os
import re
from typing import List, Dict, Set, Optional, Tuple
from datetime import datetime

from config import KNOWLEDGE_PATH, DATA_DIR

class KnowledgeBase:
    """Manages known and unknown programming concepts"""
    
    def __init__(self):
        self.known_concepts: Dict[str, dict] = {}
        self.unknown_concepts: Dict[str, dict] = {}
        self.concept_examples: Dict[str, List[str]] = {}
        self.learning_queue: List[dict] = []
        
        # Built-in Python knowledge
        self._init_builtin_knowledge()
        
        # Load saved knowledge
        self.load()
    
    def _init_builtin_knowledge(self):
        """Initialize with Python built-in knowledge"""
        
        # Python keywords
        python_keywords = [
            'def', 'class', 'if', 'else', 'elif', 'for', 'while', 'try',
            'except', 'finally', 'with', 'as', 'import', 'from', 'return',
            'yield', 'raise', 'pass', 'break', 'continue', 'lambda', 'and',
            'or', 'not', 'in', 'is', 'None', 'True', 'False', 'global',
            'nonlocal', 'assert', 'del', 'async', 'await'
        ]
        
        # Python built-in functions
        builtin_functions = [
            'print', 'len', 'range', 'int', 'str', 'float', 'list', 'dict',
            'set', 'tuple', 'bool', 'type', 'isinstance', 'issubclass',
            'hasattr', 'getattr', 'setattr', 'delattr', 'callable', 'iter',
            'next', 'enumerate', 'zip', 'map', 'filter', 'sorted', 'reversed',
            'sum', 'min', 'max', 'abs', 'round', 'pow', 'divmod', 'input',
            'open', 'file', 'repr', 'hash', 'id', 'dir', 'vars', 'locals',
            'globals', 'eval', 'exec', 'compile', 'format', 'chr', 'ord',
            'bin', 'hex', 'oct', 'slice', 'object', 'super', 'property',
            'staticmethod', 'classmethod', 'all', 'any'
        ]
        
        # Python standard library modules
        stdlib_modules = [
            'os', 'sys', 'json', 're', 'math', 'random', 'datetime', 'time',
            'collections', 'itertools', 'functools', 'operator', 'string',
            'io', 'pathlib', 'shutil', 'glob', 'pickle', 'csv', 'sqlite3',
            'urllib', 'http', 'email', 'html', 'xml', 'logging', 'unittest',
            'threading', 'multiprocessing', 'subprocess', 'socket', 'asyncio',
            'typing', 'dataclasses', 'abc', 'copy', 'pprint', 'textwrap',
            'struct', 'codecs', 'unicodedata', 'hashlib', 'hmac', 'secrets'
        ]
        
        # Common third-party libraries
        common_libraries = {
            'numpy': 'Numerical computing library',
            'pandas': 'Data analysis library',
            'tensorflow': 'Machine learning framework',
            'pytorch': 'Deep learning framework',
            'torch': 'PyTorch deep learning',
            'keras': 'High-level neural network API',
            'sklearn': 'Machine learning library',
            'scikit-learn': 'Machine learning library',
            'matplotlib': 'Plotting library',
            'seaborn': 'Statistical visualization',
            'requests': 'HTTP library',
            'flask': 'Web framework',
            'django': 'Web framework',
            'fastapi': 'Modern web framework',
            'sqlalchemy': 'Database ORM',
            'beautifulsoup': 'Web scraping',
            'selenium': 'Browser automation',
            'pytest': 'Testing framework',
            'pillow': 'Image processing',
            'opencv': 'Computer vision',
            'cv2': 'OpenCV library',
            'scipy': 'Scientific computing',
            'nltk': 'Natural language toolkit',
            'spacy': 'NLP library',
            'transformers': 'Hugging Face transformers',
            'gradio': 'ML demo interface',
            'streamlit': 'Data app framework'
        }
        
        # Add to known concepts
        for kw in python_keywords:
            self.known_concepts[kw] = {
                'type': 'keyword',
                'category': 'python_builtin',
                'learned_at': 'builtin'
            }
        
        for func in builtin_functions:
            self.known_concepts[func] = {
                'type': 'function',
                'category': 'python_builtin',
                'learned_at': 'builtin'
            }
        
        for mod in stdlib_modules:
            self.known_concepts[mod] = {
                'type': 'module',
                'category': 'python_stdlib',
                'learned_at': 'builtin'
            }
        
        for lib, desc in common_libraries.items():
            self.known_concepts[lib] = {
                'type': 'library',
                'category': 'third_party',
                'description': desc,
                'learned_at': 'builtin'
            }
    
    def save(self):
        """Save knowledge base to file"""
        data = {
            'known_concepts': self.known_concepts,
            'unknown_concepts': self.unknown_concepts,
            'concept_examples': self.concept_examples,
            'learning_queue': self.learning_queue
        }
        with open(KNOWLEDGE_PATH, 'w') as f:
            json.dump(data, f, indent=2, default=str)
    
    def load(self):
        """Load knowledge base from file"""
        if os.path.exists(KNOWLEDGE_PATH):
            try:
                with open(KNOWLEDGE_PATH, 'r') as f:
                    data = json.load(f)
                
                # Merge with built-in (don't overwrite)
                saved_known = data.get('known_concepts', {})
                for k, v in saved_known.items():
                    if k not in self.known_concepts:
                        self.known_concepts[k] = v
                
                self.unknown_concepts = data.get('unknown_concepts', {})
                self.concept_examples = data.get('concept_examples', {})
                self.learning_queue = data.get('learning_queue', [])
                
            except Exception as e:
                print(f"Error loading knowledge base: {e}")
    
    def extract_concepts(self, code: str) -> Set[str]:
        """Extract programming concepts from code"""
        concepts = set()
        
        # Extract identifiers
        identifiers = re.findall(r'[a-zA-Z_][a-zA-Z0-9_]*', code)
        concepts.update(identifiers)
        
        # Extract import statements
        imports = re.findall(r'(?:from|import)\s+([a-zA-Z_][a-zA-Z0-9_]*)', code)
        concepts.update(imports)
        
        # Extract function/class names
        definitions = re.findall(r'(?:def|class)\s+([a-zA-Z_][a-zA-Z0-9_]*)', code)
        concepts.update(definitions)
        
        return concepts
    
    def check_knowledge(self, text: str) -> Tuple[List[str], List[str]]:
        """Check text for known and unknown concepts"""
        concepts = self.extract_concepts(text)
        
        known = []
        unknown = []
        
        for concept in concepts:
            # Skip very short or common words
            if len(concept) < 2:
                continue
            if concept.lower() in ['a', 'an', 'the', 'is', 'it', 'to', 'of']:
                continue
            
            if concept.lower() in self.known_concepts or concept in self.known_concepts:
                known.append(concept)
            else:
                # Check if it looks like a library/framework name
                if self._looks_like_library(concept):
                    unknown.append(concept)
        
        return known, unknown
    
    def _looks_like_library(self, name: str) -> bool:
        """Check if a name looks like a library/module name"""
        # Skip common variable names
        common_vars = [
            'self', 'cls', 'args', 'kwargs', 'result', 'data', 'value',
            'item', 'items', 'key', 'keys', 'val', 'vals', 'obj', 'func',
            'arr', 'lst', 'num', 'count', 'index', 'idx', 'temp', 'tmp',
            'i', 'j', 'k', 'n', 'm', 'x', 'y', 'z', 'a', 'b', 'c'
        ]
        
        if name.lower() in common_vars:
            return False
        
        # Skip ALL_CAPS (likely constants)
        if name.isupper():
            return False
        
        # Skip _private and __dunder__
        if name.startswith('_'):
            return False
        
        # Looks like a library if:
        # - lowercase
        # - contains underscore
        # - known library patterns
        if name.islower() and len(name) > 3:
            return True
        
        return False
    
    def add_unknown(self, concept: str, context: str = ""):
        """Add an unknown concept to the learning queue"""
        if concept not in self.unknown_concepts:
            self.unknown_concepts[concept] = {
                'first_seen': datetime.now().isoformat(),
                'times_seen': 1,
                'contexts': [context] if context else [],
                'status': 'pending'
            }
            
            # Add to learning queue
            self.learning_queue.append({
                'concept': concept,
                'context': context,
                'timestamp': datetime.now().isoformat()
            })
        else:
            self.unknown_concepts[concept]['times_seen'] += 1
            if context and context not in self.unknown_concepts[concept]['contexts']:
                self.unknown_concepts[concept]['contexts'].append(context)
        
        self.save()
    
    def teach_concept(
        self,
        concept: str,
        concept_type: str,
        description: str,
        example_code: str,
        category: str = "user_taught"
    ):
        """Teach the model a new concept"""
        
        # Add to known concepts
        self.known_concepts[concept] = {
            'type': concept_type,
            'category': category,
            'description': description,
            'learned_at': datetime.now().isoformat()
        }
        
        # Add example
        if concept not in self.concept_examples:
            self.concept_examples[concept] = []
        self.concept_examples[concept].append(example_code)
        
        # Remove from unknown
        if concept in self.unknown_concepts:
            del self.unknown_concepts[concept]
        
        # Remove from learning queue
        self.learning_queue = [
            item for item in self.learning_queue 
            if item['concept'] != concept
        ]
        
        self.save()
        
        return True
    
    def get_learning_queue(self) -> List[dict]:
        """Get concepts waiting to be learned"""
        return self.learning_queue
    
    def get_unknown_concepts(self) -> Dict[str, dict]:
        """Get all unknown concepts"""
        return self.unknown_concepts
    
    def get_example_code(self, concept: str) -> List[str]:
        """Get example code for a concept"""
        return self.concept_examples.get(concept, [])
    
    def get_all_examples(self) -> str:
        """Get all example code for training"""
        all_code = []
        for concept, examples in self.concept_examples.items():
            for example in examples:
                all_code.append(f"# Example of {concept}\n{example}")
        return "\n\n".join(all_code)
    
    def get_statistics(self) -> dict:
        """Get knowledge base statistics"""
        return {
            'known_concepts': len(self.known_concepts),
            'unknown_concepts': len(self.unknown_concepts),
            'concepts_with_examples': len(self.concept_examples),
            'total_examples': sum(len(v) for v in self.concept_examples.values()),
            'learning_queue_size': len(self.learning_queue),
            'categories': self._count_categories()
        }
    
    def _count_categories(self) -> dict:
        """Count concepts by category"""
        categories = {}
        for concept, info in self.known_concepts.items():
            cat = info.get('category', 'unknown')
            categories[cat] = categories.get(cat, 0) + 1
        return categories


# Global knowledge base instance
knowledge_base = KnowledgeBase()