Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| from peft import PeftModel | |
| from threading import Thread | |
| BASE_MODEL = "Qwen/Qwen3-0.6B" | |
| ADAPTER_ID = "Redhanuman/Shadow-0.7B" | |
| print("π Loading Shadow Brain...") | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map="auto" | |
| ) | |
| model = PeftModel.from_pretrained(base_model, ADAPTER_ID) | |
| model.eval() | |
| def predict(message, history): | |
| system_prompt = ( | |
| "You are Shadow 0.7B, a reasoning AI created by Aman Kumar Pandey. " | |
| "Use <think> tags to plan logic before answering." | |
| ) | |
| messages = [{"role": "system", "content": system_prompt}] | |
| for user_msg, bot_msg in history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| messages.append({"role": "assistant", "content": bot_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| input_ids = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=True, | |
| add_generation_prompt=True, | |
| return_tensors="pt" | |
| ).to(model.device) | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| input_ids=input_ids, | |
| streamer=streamer, | |
| max_new_tokens=1024, | |
| temperature=0.7, | |
| top_p=0.9, | |
| repetition_penalty=1.1, | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| partial_message = "" | |
| for new_token in streamer: | |
| partial_message += new_token | |
| yield partial_message | |
| # Create the Gradio interface - minimal parameters for compatibility | |
| demo = gr.ChatInterface( | |
| fn=predict, | |
| examples=[ | |
| ["Write a Python function to check for palindromes."], | |
| ["If I have 3 apples and eat one, how many do I have?"] | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch() |