Spaces:
Sleeping
Sleeping
File size: 4,457 Bytes
2532a2f 37e7c47 2532a2f 37e7c47 2532a2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
# Initialize model and tokenizer
MODEL_PATH = "gokul-pv/Llama-3.2-1B-Instruct-16bit-CodeArchitect"
def load_model():
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
torch_dtype=torch.float32, # Use float32 for CPU
device_map="cpu" # Ensure model runs on CPU
)
return model, tokenizer
class CustomTextStreamer:
"""Custom streamer that captures only the model's response"""
def __init__(self, tokenizer):
self.tokenizer = tokenizer
self.generated_text = []
self.next_tokens_are_prompt = True
def put(self, value):
if isinstance(value, torch.Tensor):
if len(value.shape) > 1:
value = value[0]
decoded_text = self.tokenizer.decode(value.tolist(), skip_special_tokens=True)
else:
decoded_text = value
if self.next_tokens_are_prompt:
self.next_tokens_are_prompt = False # Skip prompt tokens
else:
self.generated_text.append(decoded_text)
print(decoded_text, end="", flush=True)
def end(self):
self.next_tokens_are_prompt = True
print("")
def get_generated_text(self):
return "".join(self.generated_text)
def analyze_architecture(code_input, temperature=1.5, max_tokens=512):
"""
Analyze architecture code using the loaded model
"""
model, tokenizer = load_model()
messages = [
{
"role": "system",
"content": "You are an expert in analyzing system architecture written using code. "
"You check the architecture and provide clear and detailed explanations "
"regarding how the architecture can be improved for better performance, "
"scalability, maintainability, and cost-effectiveness. You also check "
"for possible cybersecurity issues and if the components can be "
"replaced with newer and better components."
},
{
"role": "user",
"content": code_input
}
]
# Tokenize input
inputs = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
).to("cpu") # Ensure tensors are on CPU
# Initialize text streamer
text_streamer = CustomTextStreamer(tokenizer)
# Generate response
with torch.inference_mode():
model.generate(
input_ids=inputs,
streamer=text_streamer,
max_new_tokens=max_tokens,
use_cache=True,
temperature=temperature,
min_p=0.1
)
return text_streamer.get_generated_text()
# Create Gradio interface
def create_gradio_interface():
with gr.Blocks() as demo:
gr.Markdown("# Code Architect")
with gr.Row():
with gr.Column():
code_input = gr.Code(
label="Input Architecture Code",
language="python",
lines=10
)
with gr.Row():
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=1.5,
label="Temperature"
)
max_tokens = gr.Slider(
minimum=64,
maximum=2048,
value=512,
step=64,
label="Max Tokens"
)
submit_btn = gr.Button("Analyze Architecture")
with gr.Column():
output = gr.Markdown(label="Analysis Results")
submit_btn.click(
fn=analyze_architecture,
inputs=[code_input, temperature, max_tokens],
outputs=output
)
return demo
if __name__ == "__main__":
demo = create_gradio_interface()
demo.launch(
share=True, # Enable sharing
server_name="0.0.0.0", # Listen on all network interfaces
server_port=7860 # Default Gradio port
) |