File size: 7,947 Bytes
e32498e
 
 
 
a9b927e
 
e32498e
 
 
 
a9b927e
e32498e
 
2367226
a9b927e
e32498e
 
2367226
e32498e
a9b927e
e32498e
f4cab5c
e32498e
 
 
2367226
e32498e
 
94156c7
2367226
a9b927e
94156c7
e32498e
 
a9b927e
 
 
2367226
a9b927e
e32498e
 
 
 
2367226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e32498e
 
 
 
 
 
 
 
 
 
 
 
2367226
 
a9b927e
e32498e
a9b927e
 
e32498e
 
2367226
e32498e
2367226
 
a9b927e
 
e32498e
 
 
 
 
2367226
 
e32498e
2367226
e32498e
 
 
 
 
 
a9b927e
 
 
e32498e
a9b927e
2367226
 
e32498e
 
 
 
 
 
 
 
 
 
 
2367226
 
a9b927e
2367226
e32498e
 
2367226
e32498e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9b927e
e32498e
 
 
 
 
 
 
2367226
e32498e
 
 
2367226
a9b927e
f4cab5c
e32498e
 
 
a9b927e
 
e32498e
 
 
2367226
a9b927e
e32498e
 
 
a9b927e
 
e32498e
 
 
 
 
 
2367226
e32498e
 
a9b927e
e32498e
 
 
2367226
 
 
 
a9b927e
e32498e
 
 
 
2367226
 
 
 
 
a9b927e
2367226
 
 
 
 
e32498e
 
 
 
 
 
 
a9b927e
 
e32498e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9b927e
 
 
 
f4cab5c
e32498e
f4cab5c
 
 
a9b927e
f4cab5c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re
import gc
import os

# Global variables for model and tokenizer
model = None
tokenizer = None
model_loaded = False

def load_model():
    """Load the model and tokenizer optimized for CPU"""
    global model, tokenizer, model_loaded
    
    try:
        print("Loading AEGIS Conduct Economic Analysis Model for CPU...")
        
        # Load tokenizer first
        tokenizer = AutoTokenizer.from_pretrained(
            "Gaston895/aegisconduct",
            trust_remote_code=True
        )
        
        # Load model optimized for CPU
        model = AutoModelForCausalLM.from_pretrained(
            "Gaston895/aegisconduct",
            torch_dtype=torch.float16,  # Use float16 for memory efficiency
            device_map="cpu",  # Force CPU usage
            trust_remote_code=True,
            low_cpu_mem_usage=True
        )
        
        # Force garbage collection
        gc.collect()
        
        print("Model loaded successfully on CPU!")
        model_loaded = True
        return True
        
    except Exception as e:
        print(f"Error loading model: {e}")
        # Fallback to basic loading
        try:
            print("Trying fallback loading method...")
            model = AutoModelForCausalLM.from_pretrained(
                "Gaston895/aegisconduct",
                trust_remote_code=True,
                low_cpu_mem_usage=True
            )
            print("Model loaded with fallback method!")
            model_loaded = True
            return True
        except Exception as e2:
            print(f"Fallback also failed: {e2}")
            model_loaded = False
            return False

def format_response(text):
    """Clean and format the model response"""
    # Remove thinking tags if present
    text = re.sub(r'<thinking>.*?</thinking>', '', text, flags=re.DOTALL)
    
    # Clean up extra whitespace
    text = re.sub(r'\n\s*\n', '\n\n', text)
    text = text.strip()
    
    return text

def generate_response(message, history, temperature=0.7, max_tokens=128):
    """Generate response from the model optimized for CPU"""
    global model, tokenizer, model_loaded
    
    if not model_loaded or model is None or tokenizer is None:
        return "Model is loading... Please wait a moment and try again."
    
    try:
        # Build conversation context (keep it very short for CPU)
        conversation = ""
        # Only use last 2 exchanges to save memory and processing time
        recent_history = history[-2:] if len(history) > 2 else history
        
        for user_msg, assistant_msg in recent_history:
            conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n\n"
        
        # Add current message
        conversation += f"User: {message}\nAssistant:"
        
        # Tokenize input with strict length limit for CPU
        inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=512)
        
        # Generate response with CPU-optimized settings
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                temperature=temperature,
                do_sample=True,
                top_p=0.9,
                top_k=50,
                repetition_penalty=1.1,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id,
                use_cache=True,
                num_beams=1  # Use greedy decoding for speed
            )
        
        # Decode response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract only the new response
        response = response[len(conversation):].strip()
        
        # Format and clean response
        response = format_response(response)
        
        # Clean up memory after generation
        gc.collect()
        
        return response if response else "I apologize, but I couldn't generate a proper response. Please try rephrasing your question."
        
    except Exception as e:
        return f"Error generating response: {str(e)}. Please try a shorter question."

def chat_interface(message, history, temperature, max_tokens):
    """Main chat interface function"""
    if not message.strip():
        return history, ""
    
    # Generate response
    response = generate_response(message, history, temperature, max_tokens)
    
    # Add to history
    history.append((message, response))
    
    return history, ""

# Create Gradio interface
with gr.Blocks(title="AEGIS Conduct - Economic Analysis Chat") as demo:
    
    gr.Markdown("""

    # 🤖 AEGIS Conduct - Economic Analysis Chat

    

    Chat with an AI model specialized in economic and financial analysis. This model features:

    - **Thinking Mode**: Automatic activation for complex reasoning

    - **Economic Expertise**: Specialized knowledge in finance, markets, and policy

    - **CPU Optimized**: Running efficiently on CPU hardware

    

    Ask questions about economics, finance, market analysis, policy impacts, and more!

    

    **Note**: This is a CPU-optimized version. Responses may take a moment to generate.

    """)
    
    with gr.Row():
        with gr.Column(scale=4):
            chatbot = gr.Chatbot(
                height=400,
                show_label=False
            )
            
            msg = gr.Textbox(
                placeholder="Ask me about economics, finance, markets... (keep questions concise for faster responses)",
                show_label=False
            )
            
            with gr.Row():
                submit_btn = gr.Button("Send", variant="primary")
                clear_btn = gr.Button("Clear Chat")
        
        with gr.Column(scale=1):
            gr.Markdown("### Settings")
            
            temperature = gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.7,
                step=0.1,
                label="Temperature"
            )
            
            max_tokens = gr.Slider(
                minimum=32,
                maximum=256,
                value=128,
                step=32,
                label="Max Response Length"
            )
            
            gr.Markdown("""

            ### Example Questions

            - What causes inflation?

            - Explain interest rates

            - How do markets work?

            - What is GDP?

            - Define recession

            

            ### CPU Optimization

            - Responses limited to 128 tokens for speed

            - Only recent conversation used

            - Optimized for CPU processing

            - Keep questions concise

            """)
    
    # Event handlers
    def submit_message(message, history, temp, max_tok):
        return chat_interface(message, history, temp, max_tok)
    
    def clear_chat():
        # Force garbage collection when clearing
        gc.collect()
        return [], ""
    
    # Bind events
    submit_btn.click(
        submit_message,
        inputs=[msg, chatbot, temperature, max_tokens],
        outputs=[chatbot, msg]
    )
    
    msg.submit(
        submit_message,
        inputs=[msg, chatbot, temperature, max_tokens],
        outputs=[chatbot, msg]
    )
    
    clear_btn.click(
        clear_chat,
        outputs=[chatbot, msg]
    )

# Load model on startup
print("Initializing AEGIS Conduct Chat Interface...")
load_model()

# Launch configuration
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )