File size: 5,455 Bytes
068bc7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python3
"""
Quick fix - loads model directly without HuggingFace cache
Run this instead of enhanced_chat.py
"""

import sys
import torch
from pathlib import Path
from safetensors.torch import load_file as load_safetensors
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
import json

# Add src to path
sys.path.insert(0, str(Path(__file__).parent / "src"))

# Import enhancements
from enhancements import get_config, EnhancementConfig
from enhancements.nlp import IntentDetector, EntityRecognizer
from enhancements.knowledge_graph import KnowledgeGraph, RAGEngine
from enhancements.emotional_intelligence import SentimentAnalyzer, EmpathyEngine
from enhancements.collaboration import ConversationStateManager, MCPIntegration
from enhancements.learning import FeedbackCollector, PerformanceMonitor


class Stack2_9Enhanced:
    """Enhanced Stack 2.9 - Direct file loading (no download)"""

    def __init__(self, model_path: str = "/Users/walidsobhi/stack-2-9-final-model"):
        self.model_path = model_path
        self._model = None
        self._tokenizer = None
        self._init_modules()

    def _init_modules(self):
        print("Loading enhancement modules...")
        config = get_config()

        self.intent_detector = IntentDetector()
        self.entity_recognizer = EntityRecognizer()
        self.knowledge_graph = KnowledgeGraph()
        self.rag_engine = RAGEngine()
        self.sentiment_analyzer = SentimentAnalyzer()
        self.empathy_engine = EmpathyEngine()
        self.conversation_manager = ConversationStateManager()
        self.mcp = MCPIntegration()
        self.feedback_collector = FeedbackCollector()
        self.performance_monitor = PerformanceMonitor()

        # Seed RAG
        self.rag_engine.add_document("intro", "Stack 2.9 is an AI coding assistant")
        self.rag_engine.add_document("commands", "Commands: search:<query>, quit, feedback")

        print("✓ All modules loaded!\n")

    def load_model(self):
        """Load model directly from local files - NO DOWNLOAD"""
        if self._model is None:
            print(f"\nLoading model from {self.model_path} (direct load - no download)...")

            model_path = Path(self.model_path)

            # Load config
            config = AutoConfig.from_pretrained(model_path)

            # Load tokenizer
            self._tokenizer = AutoTokenizer.from_pretrained(model_path)

            # Load weights directly (bypasses HuggingFace cache entirely)
            print("Loading model.safetensors directly...")
            weights = load_safetensors(str(model_path / "model.safetensors"))

            # Create model and load weights
            self._model = AutoModelForCausalLM.from_config(config)
            self._model.load_state_dict(weights)
            self._model = self._model.to(torch.float16)

            if torch.cuda.is_available():
                self._model.to("cuda")
                print("✓ Model loaded to GPU!\n")
            else:
                print("✓ Model loaded to CPU!\n")

    def chat(self):
        print("=" * 50)
        print("Stack 2.9 Enhanced (Direct Load)")
        print("=" * 50)
        print("\nCommands: search:<query>, feedback, quit\n")

        self.conversation_manager.create_session()
        self.performance_monitor.increment_session_count()

        while True:
            try:
                user_input = input("You: ").strip()
                if not user_input:
                    continue
                if user_input.lower() in ['quit', 'exit', 'q']:
                    break

                # Process input
                intent = self.intent_detector.detect_intent(user_input)
                sentiment = self.sentiment_analyzer.analyze_sentiment(user_input)
                rag_context = self.rag_engine.retrieve_as_context(user_input, 300)

                # Generate response
                self.load_model()

                system = "You are Stack 2.9, an expert AI coding assistant."
                if rag_context:
                    system += f"\nContext: {rag_context}"
                if sentiment['sentiment'] == 'negative':
                    system += "\nBe empathetic."

                full_prompt = f"{system}\n\nUser: {user_input}\nAssistant:"
                inputs = self._tokenizer(full_prompt, return_tensors='pt')
                if torch.cuda.is_available():
                    inputs = inputs.to("cuda")

                outputs = self._model.generate(
                    **inputs,
                    max_new_tokens=150,
                    temperature=0.4,
                    do_sample=True,
                    pad_token_id=self._tokenizer.eos_token_id
                )

                response = self._tokenizer.decode(outputs[0], skip_special_tokens=True)
                if "Assistant:" in response:
                    response = response.split("Assistant:")[-1].strip()

                print(f"AI: {response}\n")

                self.performance_monitor.increment_message_count()
                self.conversation_manager.add_message("user", user_input)
                self.conversation_manager.add_message("assistant", response)

            except KeyboardInterrupt:
                break

        print(f"\nSession complete. Messages: {self.performance_monitor.get_session_stats()['total_messages']}")


if __name__ == "__main__":
    chat = Stack2_9Enhanced()
    chat.chat()