Chompi10 commited on
Commit
5a6e020
·
verified ·
1 Parent(s): c517f97

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -0
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import warnings
3
+ import torch
4
+ import gradio as gr
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
+
7
+ # ==========================
8
+ # CONFIG
9
+ # ==========================
10
+
11
+ MODEL_NAME = "sshleifer/tiny-gpt2"
12
+ MAX_NEW_TOKENS = 300
13
+ MAX_INPUT_CHARS = 4000
14
+ MAX_HISTORY = 5
15
+
16
+ # ==========================
17
+ # SETTINGS
18
+ # ==========================
19
+
20
+ warnings.filterwarnings("ignore")
21
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
22
+
23
+ torch.set_grad_enabled(False)
24
+ torch.set_num_threads(max(1, os.cpu_count() // 2))
25
+
26
+ device = "cuda" if torch.cuda.is_available() else "cpu"
27
+
28
+ print("🔄 Loading tokenizer...")
29
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
30
+
31
+ print("🔄 Loading model...")
32
+ model = AutoModelForCausalLM.from_pretrained(
33
+ MODEL_NAME,
34
+ low_cpu_mem_usage=True
35
+ ).to(device)
36
+
37
+ model.eval()
38
+
39
+ if tokenizer.pad_token is None:
40
+ tokenizer.pad_token = tokenizer.eos_token
41
+
42
+ print("✅ Model loaded successfully.")
43
+
44
+ # ==========================
45
+ # PROMPTS
46
+ # ==========================
47
+
48
+ GENERATOR_PROMPT = """
49
+ You are a Senior Software Engineer.
50
+ Generate clean, secure, optimized production-ready code.
51
+ Explain briefly what the code does.
52
+ """
53
+
54
+ ULTRA_REVIEW_PROMPT = """
55
+ You are a Principal Software Architect and Security Engineer.
56
+ Perform a strict professional review.
57
+
58
+ Return:
59
+
60
+ ## Executive Summary
61
+ ## Critical Bugs
62
+ ## Security Issues
63
+ ## Architecture Problems
64
+ ## Performance Issues
65
+ ## Code Quality Problems
66
+ ## Refactored Version
67
+ ## Final Verdict
68
+ """
69
+
70
+ ARCHITECTURE_ANALYSIS_PROMPT = """
71
+ You are a Senior Software Architect.
72
+ Perform deep architectural analysis.
73
+
74
+ Return:
75
+
76
+ ## Architecture Overview
77
+ ## Structural Problems
78
+ ## Scalability Evaluation
79
+ ## Resilience Evaluation
80
+ ## Improvements
81
+ ## Final Verdict
82
+ """
83
+
84
+ # ==========================
85
+ # PROMPT BUILDER
86
+ # ==========================
87
+
88
+ def build_prompt(mode, user_input, history):
89
+
90
+ if mode == "Generar Código":
91
+ system_prompt = GENERATOR_PROMPT
92
+ elif mode == "Revisión Ultra Crítica":
93
+ system_prompt = ULTRA_REVIEW_PROMPT
94
+ user_input = f"Deep review:\n\n{user_input}"
95
+ else:
96
+ system_prompt = ARCHITECTURE_ANALYSIS_PROMPT
97
+ user_input = f"Deep architectural analysis:\n\n{user_input}"
98
+
99
+ conversation = system_prompt + "\n\n"
100
+
101
+ history = history[-MAX_HISTORY:]
102
+
103
+ for user, assistant in history:
104
+ conversation += f"User: {user}\nAssistant: {assistant}\n"
105
+
106
+ conversation += f"User: {user_input}\nAssistant:"
107
+
108
+ return conversation
109
+
110
+ # ==========================
111
+ # GENERATION
112
+ # ==========================
113
+
114
+ def generate_text(prompt):
115
+
116
+ inputs = tokenizer(
117
+ prompt,
118
+ return_tensors="pt",
119
+ truncation=True,
120
+ max_length=1024
121
+ ).to(device)
122
+
123
+ with torch.inference_mode():
124
+ outputs = model.generate(
125
+ **inputs,
126
+ max_new_tokens=MAX_NEW_TOKENS,
127
+ temperature=0.3,
128
+ top_p=0.9,
129
+ do_sample=False,
130
+ pad_token_id=tokenizer.eos_token_id
131
+ )
132
+
133
+ generated_tokens = outputs[0][inputs["input_ids"].shape[-1]:]
134
+
135
+ return tokenizer.decode(
136
+ generated_tokens,
137
+ skip_special_tokens=True
138
+ ).strip()
139
+
140
+ # ==========================
141
+ # CHAT
142
+ # ==========================
143
+
144
+ def chat(user_input, history, mode):
145
+
146
+ if not user_input:
147
+ return "⚠️ Empty input."
148
+
149
+ if len(user_input) > MAX_INPUT_CHARS:
150
+ return "⚠️ Input too large."
151
+
152
+ try:
153
+ prompt = build_prompt(mode, user_input, history)
154
+ return generate_text(prompt)
155
+ except RuntimeError as e:
156
+ return f"⚠️ Generation error: {str(e)}"
157
+ except Exception:
158
+ return "⚠️ Unexpected error occurred."
159
+
160
+ # ==========================
161
+ # UI
162
+ # ==========================
163
+
164
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
165
+
166
+ gr.Markdown("# 🧠 Federico - Lightweight CTO Assistant")
167
+
168
+ mode = gr.Radio(
169
+ [
170
+ "Generar Código",
171
+ "Revisión Ultra Crítica",
172
+ "Análisis Arquitectura"
173
+ ],
174
+ value="Revisión Ultra Crítica",
175
+ label="Modo"
176
+ )
177
+
178
+ gr.ChatInterface(
179
+ fn=chat,
180
+ additional_inputs=[mode],
181
+ chatbot=gr.Chatbot(height=500),
182
+ )
183
+
184
+ demo.launch(server_name="0.0.0.0", server_port=7860)