""" Lightweight Multi-Model AI Backend for Hugging Face Gradio Space Optimized for FREE CPU tier - No GPU required """ import gc import os import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from PIL import Image, ImageDraw import numpy as np import base64 from io import BytesIO # ===== DEVICE CONFIGURATION ===== device = "cpu" torch.set_num_threads(4) os.environ['TOKENIZERS_PARALLELISM'] = 'false' # ===== MODEL MANAGER ===== class ModelManager: def __init__(self): self.chat_model = None self.chat_tokenizer = None self.summarizer_pipeline = None def load_chat_model(self): if self.chat_model is None: print("Loading TinyLlama...") model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" self.chat_tokenizer = AutoTokenizer.from_pretrained(model_name) self.chat_model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float32, device_map=device, low_cpu_mem_usage=True ) self.chat_model.eval() gc.collect() return self.chat_model, self.chat_tokenizer def load_summarizer(self): if self.summarizer_pipeline is None: print("Loading FLAN-T5...") self.summarizer_pipeline = pipeline( "summarization", model="google/flan-t5-small", framework="pt", device=-1 ) gc.collect() return self.summarizer_pipeline model_manager = ModelManager() # ===== GENERATION FUNCTIONS ===== def chat_fn(prompt, max_tokens, temperature): try: max_tokens = min(int(max_tokens), 200) model, tokenizer = model_manager.load_chat_model() inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature, top_p=0.9, do_sample=True, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) del inputs, outputs gc.collect() return response except Exception as e: return f"Error: {str(e)}" def code_fn(prompt, max_tokens, temperature): try: max_tokens = min(int(max_tokens), 300) model, tokenizer = model_manager.load_chat_model() code_prompt = f"Generate Python code: {prompt}" inputs = tokenizer(code_prompt, return_tensors="pt", truncation=True, max_length=512) inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_tokens, temperature=max(temperature, 0.1), top_p=0.95, do_sample=True, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) del inputs, outputs gc.collect() return response except Exception as e: return f"Error: {str(e)}" def summarize_fn(text, max_length): try: if len(text.strip()) < 50: return "Text too short (min 50 chars)" text = text[:1000] if len(text) > 1000 else text summarizer = model_manager.load_summarizer() summary = summarizer(text, max_length=min(int(max_length), 150), min_length=20, do_sample=False) gc.collect() return summary[0]['summary_text'] except Exception as e: return f"Error: {str(e)}" def image_fn(prompt, width, height): try: width, height = min(int(width), 256), min(int(height), 256) seed = abs(hash(prompt)) % (2**32) np.random.seed(seed) torch.manual_seed(seed) img = Image.new('RGB', (width, height), color=(255, 255, 255)) pixels = img.load() for y in range(height): for x in range(width): r = int((np.sin(x / 50 + seed) * 127) + 128) g = int((np.cos(y / 50 + seed * 0.5) * 127) + 128) b = int((np.sin((x + y) / 100 + seed * 0.7) * 127) + 128) pixels[x, y] = (r, g, b) buffered = BytesIO() img.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() return f"data:image/png;base64,{img_str}" except Exception as e: return f"Error: {str(e)}" # ===== GRADIO INTERFACE ===== # Create individual interfaces chat_demo = gr.Interface( fn=chat_fn, inputs=[ gr.Textbox(lines=3, label="Message"), gr.Slider(50, 200, 150, step=10, label="Max Tokens"), gr.Slider(0.1, 1.0, 0.7, step=0.1, label="Temperature") ], outputs=gr.Textbox(lines=10, label="Response"), title="💬 Chat" ) code_demo = gr.Interface( fn=code_fn, inputs=[ gr.Textbox(lines=3, label="Description"), gr.Slider(100, 300, 256, step=20, label="Max Tokens"), gr.Slider(0.1, 1.0, 0.3, step=0.1, label="Temperature") ], outputs=gr.Textbox(lines=10, label="Code"), title="💻 Code" ) summarize_demo = gr.Interface( fn=summarize_fn, inputs=[ gr.Textbox(lines=8, label="Text"), gr.Slider(20, 150, 100, step=10, label="Summary Length") ], outputs=gr.Textbox(lines=8, label="Summary"), title="📝 Summarize" ) image_demo = gr.Interface( fn=image_fn, inputs=[ gr.Textbox(label="Description"), gr.Slider(128, 256, 256, step=32, label="Width"), gr.Slider(128, 256, 256, step=32, label="Height") ], outputs=gr.Textbox(label="Image (Base64)"), title="🎨 Image" ) # Create tabbed interface demo = gr.TabbedInterface( [chat_demo, code_demo, summarize_demo, image_demo], tab_names=["💬 Chat", "💻 Code", "📝 Summarize", "🎨 Image"], title="🤖 Lightweight AI Backend" ) # ===== INITIALIZE AND RUN ===== if __name__ == "__main__": print("=" * 60) print("🚀 Lightweight AI Backend Starting...") print("=" * 60) print(f"Device: {device}") print(f"CPU Threads: {torch.get_num_threads()}") print("=" * 60) demo.queue(max_size=10, default_concurrency_limit=2) demo.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True)