Spaces:

Gaston895
/

simple

Sleeping

App Files Files Community

simple / app.py

Gaston895

Upload app.py

94156c7 verified 23 days ago

raw

history blame contribute delete

7.95 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import re
	import gc
	import os

	# Global variables for model and tokenizer
	model = None
	tokenizer = None
	model_loaded = False

	def load_model():
	"""Load the model and tokenizer optimized for CPU"""
	global model, tokenizer, model_loaded

	try:
	print("Loading AEGIS Conduct Economic Analysis Model for CPU...")

	# Load tokenizer first
	tokenizer = AutoTokenizer.from_pretrained(
	"Gaston895/aegisconduct",
	trust_remote_code=True
	)

	# Load model optimized for CPU
	model = AutoModelForCausalLM.from_pretrained(
	"Gaston895/aegisconduct",
	torch_dtype=torch.float16, # Use float16 for memory efficiency
	device_map="cpu", # Force CPU usage
	trust_remote_code=True,
	low_cpu_mem_usage=True
	)

	# Force garbage collection
	gc.collect()

	print("Model loaded successfully on CPU!")
	model_loaded = True
	return True

	except Exception as e:
	print(f"Error loading model: {e}")
	# Fallback to basic loading
	try:
	print("Trying fallback loading method...")
	model = AutoModelForCausalLM.from_pretrained(
	"Gaston895/aegisconduct",
	trust_remote_code=True,
	low_cpu_mem_usage=True
	)
	print("Model loaded with fallback method!")
	model_loaded = True
	return True
	except Exception as e2:
	print(f"Fallback also failed: {e2}")
	model_loaded = False
	return False

	def format_response(text):
	"""Clean and format the model response"""
	# Remove thinking tags if present
	text = re.sub(r'<thinking>.*?</thinking>', '', text, flags=re.DOTALL)

	# Clean up extra whitespace
	text = re.sub(r'\n\s*\n', '\n\n', text)
	text = text.strip()

	return text

	def generate_response(message, history, temperature=0.7, max_tokens=128):
	"""Generate response from the model optimized for CPU"""
	global model, tokenizer, model_loaded

	if not model_loaded or model is None or tokenizer is None:
	return "Model is loading... Please wait a moment and try again."

	try:
	# Build conversation context (keep it very short for CPU)
	conversation = ""
	# Only use last 2 exchanges to save memory and processing time
	recent_history = history[-2:] if len(history) > 2 else history

	for user_msg, assistant_msg in recent_history:
	conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n\n"

	# Add current message
	conversation += f"User: {message}\nAssistant:"

	# Tokenize input with strict length limit for CPU
	inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=512)

	# Generate response with CPU-optimized settings
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	do_sample=True,
	top_p=0.9,
	top_k=50,
	repetition_penalty=1.1,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	use_cache=True,
	num_beams=1 # Use greedy decoding for speed
	)

	# Decode response
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract only the new response
	response = response[len(conversation):].strip()

	# Format and clean response
	response = format_response(response)

	# Clean up memory after generation
	gc.collect()

	return response if response else "I apologize, but I couldn't generate a proper response. Please try rephrasing your question."

	except Exception as e:
	return f"Error generating response: {str(e)}. Please try a shorter question."

	def chat_interface(message, history, temperature, max_tokens):
	"""Main chat interface function"""
	if not message.strip():
	return history, ""

	# Generate response
	response = generate_response(message, history, temperature, max_tokens)

	# Add to history
	history.append((message, response))

	return history, ""

	# Create Gradio interface
	with gr.Blocks(title="AEGIS Conduct - Economic Analysis Chat") as demo:

	gr.Markdown("""
	# 🤖 AEGIS Conduct - Economic Analysis Chat

	Chat with an AI model specialized in economic and financial analysis. This model features:
	- Thinking Mode: Automatic activation for complex reasoning
	- Economic Expertise: Specialized knowledge in finance, markets, and policy
	- CPU Optimized: Running efficiently on CPU hardware

	Ask questions about economics, finance, market analysis, policy impacts, and more!

	Note: This is a CPU-optimized version. Responses may take a moment to generate.
	""")

	with gr.Row():
	with gr.Column(scale=4):
	chatbot = gr.Chatbot(
	height=400,
	show_label=False
	)

	msg = gr.Textbox(
	placeholder="Ask me about economics, finance, markets... (keep questions concise for faster responses)",
	show_label=False
	)

	with gr.Row():
	submit_btn = gr.Button("Send", variant="primary")
	clear_btn = gr.Button("Clear Chat")

	with gr.Column(scale=1):
	gr.Markdown("### Settings")

	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	)

	max_tokens = gr.Slider(
	minimum=32,
	maximum=256,
	value=128,
	step=32,
	label="Max Response Length"
	)

	gr.Markdown("""
	### Example Questions
	- What causes inflation?
	- Explain interest rates
	- How do markets work?
	- What is GDP?
	- Define recession

	### CPU Optimization
	- Responses limited to 128 tokens for speed
	- Only recent conversation used
	- Optimized for CPU processing
	- Keep questions concise
	""")

	# Event handlers
	def submit_message(message, history, temp, max_tok):
	return chat_interface(message, history, temp, max_tok)

	def clear_chat():
	# Force garbage collection when clearing
	gc.collect()
	return [], ""

	# Bind events
	submit_btn.click(
	submit_message,
	inputs=[msg, chatbot, temperature, max_tokens],
	outputs=[chatbot, msg]
	)

	msg.submit(
	submit_message,
	inputs=[msg, chatbot, temperature, max_tokens],
	outputs=[chatbot, msg]
	)

	clear_btn.click(
	clear_chat,
	outputs=[chatbot, msg]
	)

	# Load model on startup
	print("Initializing AEGIS Conduct Chat Interface...")
	load_model()

	# Launch configuration
	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)