File size: 4,457 Bytes
2532a2f
 
 
 
 
37e7c47
2532a2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37e7c47
2532a2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

# Initialize model and tokenizer
MODEL_PATH = "gokul-pv/Llama-3.2-1B-Instruct-16bit-CodeArchitect"

def load_model():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_PATH,
        torch_dtype=torch.float32,  # Use float32 for CPU
        device_map="cpu"  # Ensure model runs on CPU
    )
    return model, tokenizer

class CustomTextStreamer:
    """Custom streamer that captures only the model's response"""
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer
        self.generated_text = []
        self.next_tokens_are_prompt = True
        
    def put(self, value):
        if isinstance(value, torch.Tensor):
            if len(value.shape) > 1:
                value = value[0]
            decoded_text = self.tokenizer.decode(value.tolist(), skip_special_tokens=True)
        else:
            decoded_text = value
            
        if self.next_tokens_are_prompt:
            self.next_tokens_are_prompt = False  # Skip prompt tokens
        else:
            self.generated_text.append(decoded_text)
            print(decoded_text, end="", flush=True)
    
    def end(self):
        self.next_tokens_are_prompt = True
        print("")
    
    def get_generated_text(self):
        return "".join(self.generated_text)

def analyze_architecture(code_input, temperature=1.5, max_tokens=512):
    """
    Analyze architecture code using the loaded model
    """
    model, tokenizer = load_model()
    
    messages = [
        {
            "role": "system",
            "content": "You are an expert in analyzing system architecture written using code. "
                      "You check the architecture and provide clear and detailed explanations "
                      "regarding how the architecture can be improved for better performance, "
                      "scalability, maintainability, and cost-effectiveness. You also check "
                      "for possible cybersecurity issues and if the components can be "
                      "replaced with newer and better components."
        },
        {
            "role": "user",
            "content": code_input
        }
    ]
    
    # Tokenize input
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cpu")  # Ensure tensors are on CPU
    
    # Initialize text streamer
    text_streamer = CustomTextStreamer(tokenizer)
    
    # Generate response
    with torch.inference_mode():
        model.generate(
            input_ids=inputs,
            streamer=text_streamer,
            max_new_tokens=max_tokens,
            use_cache=True,
            temperature=temperature,
            min_p=0.1
        )
    
    return text_streamer.get_generated_text()

# Create Gradio interface
def create_gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# Code Architect")
        
        with gr.Row():
            with gr.Column():
                code_input = gr.Code(
                    label="Input Architecture Code",
                    language="python",
                    lines=10
                )
                
                with gr.Row():
                    temperature = gr.Slider(
                        minimum=0.1,
                        maximum=2.0,
                        value=1.5,
                        label="Temperature"
                    )
                    max_tokens = gr.Slider(
                        minimum=64,
                        maximum=2048,
                        value=512,
                        step=64,
                        label="Max Tokens"
                    )
                
                submit_btn = gr.Button("Analyze Architecture")
            
            with gr.Column():
                output = gr.Markdown(label="Analysis Results")
        
        submit_btn.click(
            fn=analyze_architecture,
            inputs=[code_input, temperature, max_tokens],
            outputs=output
        )
    
    return demo

if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(
        share=True,  # Enable sharing
        server_name="0.0.0.0",  # Listen on all network interfaces
        server_port=7860  # Default Gradio port
    )