Spaces:
Running
Running
HalogenFlo commited on
Commit ·
3b36145
1
Parent(s): 3fe3127
Deploy TIC Multi-Task AI Hub
Browse files- app.py +190 -0
- requirements.txt +7 -0
app.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoImageProcessor, AutoModelForSequenceClassification, AutoModelForImageClassification, pipeline
|
| 4 |
+
from peft import PeftModel
|
| 5 |
+
from PIL import Image, ImageOps
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 9 |
+
print(f"Using device: {device}")
|
| 10 |
+
|
| 11 |
+
print("Loading emotion tokenizer and model...")
|
| 12 |
+
emotion_model_name = "HalogenFlo/microsoft-deberta-v3-base-emotion-recognition"
|
| 13 |
+
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
|
| 14 |
+
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name).to(device)
|
| 15 |
+
emotion_labels = ["sadness", "joy", "love", "anger", "fear", "surprise"]
|
| 16 |
+
|
| 17 |
+
def predict_emotion(text):
|
| 18 |
+
inputs = emotion_tokenizer(text, padding=True, truncation=True, return_tensors="pt").to(device)
|
| 19 |
+
with torch.no_grad():
|
| 20 |
+
outputs = emotion_model(**inputs)
|
| 21 |
+
pros = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
|
| 22 |
+
# pred = torch.argmax(pros, dim=-1)
|
| 23 |
+
results = {emotion_labels[i]: float(pros[i]) for i in range(len(emotion_labels))}
|
| 24 |
+
return dict(sorted(results.items(), key=lambda item: item[1], reverse=True))
|
| 25 |
+
|
| 26 |
+
print("Loading vit processor and model...")
|
| 27 |
+
emnist_model_name = "HalogenFlo/vit-emnist-byclass"
|
| 28 |
+
process = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
|
| 29 |
+
emnist_model = AutoModelForImageClassification.from_pretrained(emnist_model_name).to(device)
|
| 30 |
+
emnist_labels = [
|
| 31 |
+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
| 32 |
+
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
| 33 |
+
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
| 34 |
+
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
| 35 |
+
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
|
| 36 |
+
]
|
| 37 |
+
def predict_character(image):
|
| 38 |
+
if image is None:
|
| 39 |
+
return {}
|
| 40 |
+
|
| 41 |
+
# Extract the composite PIL Image from gr.Sketchpad dict
|
| 42 |
+
pil_image = image.get("composite") if isinstance(image, dict) else image
|
| 43 |
+
if pil_image is None:
|
| 44 |
+
return {}
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
# Convert to RGBA to easily handle transparency
|
| 48 |
+
rgba_image = pil_image.convert("RGBA")
|
| 49 |
+
|
| 50 |
+
# Create a solid white background of the same size
|
| 51 |
+
white_bg = Image.new("RGBA", rgba_image.size, (255, 255, 255, 255))
|
| 52 |
+
|
| 53 |
+
# Standardize: Composite the drawing onto the white background
|
| 54 |
+
composite = Image.alpha_composite(white_bg, rgba_image)
|
| 55 |
+
|
| 56 |
+
# Convert to Grayscale
|
| 57 |
+
gray_image = composite.convert("L")
|
| 58 |
+
|
| 59 |
+
# EMNIST models require white strokes on a black background.
|
| 60 |
+
# Detect if the background is light (avg_color > 127) and invert if necessary.
|
| 61 |
+
avg_color = np.mean(np.array(gray_image))
|
| 62 |
+
if avg_color > 127:
|
| 63 |
+
gray_image = ImageOps.invert(gray_image)
|
| 64 |
+
|
| 65 |
+
# Convert back to RGB for the Vision Transformer processor
|
| 66 |
+
processed_image = gray_image.convert("RGB")
|
| 67 |
+
rgb_image = processed_image.resize((224, 224))
|
| 68 |
+
|
| 69 |
+
inputs = process(images=rgb_image, return_tensors="pt").to(device)
|
| 70 |
+
with torch.no_grad():
|
| 71 |
+
outputs = emnist_model(**inputs)
|
| 72 |
+
|
| 73 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
|
| 74 |
+
topk_probs, topk_idx = torch.topk(probs, 5)
|
| 75 |
+
|
| 76 |
+
results = {
|
| 77 |
+
emnist_labels[int(idx.item())]: float(val.item())
|
| 78 |
+
for val, idx in zip(topk_probs, topk_idx)
|
| 79 |
+
}
|
| 80 |
+
return results
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(f"Error predicting character: {e}")
|
| 83 |
+
return {}
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
print("Loading llm processor and model...")
|
| 88 |
+
base_model_name = "Qwen/Qwen2.5-0.5B-Instruct"
|
| 89 |
+
llm_model_name = "HalogenFlo/qwen-2.5b-finetuned-qlora"
|
| 90 |
+
llm_tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
| 91 |
+
base_model = AutoModelForCausalLM.from_pretrained(base_model_name).to(device)
|
| 92 |
+
llm_model = PeftModel.from_pretrained(base_model, llm_model_name)
|
| 93 |
+
|
| 94 |
+
def format_covert(text):
|
| 95 |
+
return f"<|im_start|>user\n{text}\n<|im_end|>\n<|im_start|>assistant"
|
| 96 |
+
|
| 97 |
+
def generate_text(message, history):
|
| 98 |
+
try:
|
| 99 |
+
inputs = llm_tokenizer(format_covert(message), return_tensors="pt").to(device)
|
| 100 |
+
with torch.no_grad():
|
| 101 |
+
outputs = llm_model.generate(
|
| 102 |
+
**inputs,
|
| 103 |
+
max_length=2048,
|
| 104 |
+
do_sample=False,
|
| 105 |
+
repetition_penalty=1.15,
|
| 106 |
+
eos_token_id=llm_tokenizer.eos_token_id,
|
| 107 |
+
pad_token_id=llm_tokenizer.pad_token_id
|
| 108 |
+
)
|
| 109 |
+
response = llm_tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
|
| 110 |
+
return response
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"Error generating text: {e}")
|
| 113 |
+
return f"Error: {str(e)}"
|
| 114 |
+
|
| 115 |
+
custom_css = """
|
| 116 |
+
body, .gradio-container {
|
| 117 |
+
background: #0f172a !important;
|
| 118 |
+
color: #f1f5f9 !important;
|
| 119 |
+
}
|
| 120 |
+
.main-title {
|
| 121 |
+
color: #38bdf8;
|
| 122 |
+
text-align: center;
|
| 123 |
+
font-weight: 800;
|
| 124 |
+
font-size: 2.5rem;
|
| 125 |
+
margin-bottom: 0.5rem;
|
| 126 |
+
}
|
| 127 |
+
.subtitle {
|
| 128 |
+
text-align: center;
|
| 129 |
+
color: #94a3b8;
|
| 130 |
+
margin-bottom: 2rem;
|
| 131 |
+
}
|
| 132 |
+
.primary-btn {
|
| 133 |
+
background: linear-gradient(135deg, #3b82f6 0%, #8b5cf6 100%) !important;
|
| 134 |
+
border: none !important;
|
| 135 |
+
color: white !important;
|
| 136 |
+
font-weight: 600 !important;
|
| 137 |
+
border-radius: 12px !important;
|
| 138 |
+
}
|
| 139 |
+
"""
|
| 140 |
+
|
| 141 |
+
with gr.Blocks(css=custom_css, title="TIC AI Hub") as demo:
|
| 142 |
+
gr.HTML("<h1 class='main-title'>TIC Multi-Task AI Hub</h1>")
|
| 143 |
+
gr.HTML("<p class='subtitle'>Experience 3 state-of-the-art AI models from the HF Mastery R&D roadmap</p>")
|
| 144 |
+
|
| 145 |
+
with gr.Tabs():
|
| 146 |
+
# Tab 1: Emotion Classifier
|
| 147 |
+
with gr.TabItem("Emotion Classification"):
|
| 148 |
+
gr.Markdown("### Analyze the emotion of English text using DeBERTa-v3")
|
| 149 |
+
with gr.Row():
|
| 150 |
+
with gr.Column():
|
| 151 |
+
txt_input = gr.Textbox(
|
| 152 |
+
label="Enter English text to analyze",
|
| 153 |
+
placeholder="Type something here...",
|
| 154 |
+
lines=4
|
| 155 |
+
)
|
| 156 |
+
with gr.Row():
|
| 157 |
+
clear_btn_e = gr.Button("Clear", elem_classes="secondary-btn")
|
| 158 |
+
submit_btn_e = gr.Button("Analyze", elem_classes="primary-btn")
|
| 159 |
+
with gr.Column():
|
| 160 |
+
lbl_emotion = gr.Label(label="Emotion Probabilities", num_top_classes=6)
|
| 161 |
+
|
| 162 |
+
submit_btn_e.click(fn=predict_emotion, inputs=txt_input, outputs=lbl_emotion)
|
| 163 |
+
clear_btn_e.click(fn=lambda: ("", None), outputs=[txt_input, lbl_emotion])
|
| 164 |
+
|
| 165 |
+
# Tab 2: Handwriting Recognition
|
| 166 |
+
with gr.TabItem("Handwriting Recognition"):
|
| 167 |
+
gr.Markdown("### Recognize handwritten characters and digits using ViT")
|
| 168 |
+
with gr.Row():
|
| 169 |
+
with gr.Column():
|
| 170 |
+
img_input = gr.Sketchpad(
|
| 171 |
+
label="Draw a character on the sketchpad below",
|
| 172 |
+
type="pil"
|
| 173 |
+
)
|
| 174 |
+
with gr.Row():
|
| 175 |
+
clear_btn_h = gr.Button("Clear", elem_classes="secondary-btn")
|
| 176 |
+
submit_btn_h = gr.Button("Predict", elem_classes="primary-btn")
|
| 177 |
+
with gr.Column():
|
| 178 |
+
lbl_handwrite = gr.Label(label="Top 5 Predicted Characters", num_top_classes=5)
|
| 179 |
+
|
| 180 |
+
submit_btn_h.click(fn=predict_character, inputs=img_input, outputs=lbl_handwrite)
|
| 181 |
+
clear_btn_h.click(fn=lambda: (None, None), outputs=[img_input, lbl_handwrite])
|
| 182 |
+
|
| 183 |
+
# Tab 3: Chatbot
|
| 184 |
+
with gr.TabItem("AI Chatbot"):
|
| 185 |
+
gr.Markdown("### Interactive conversation with fine-tuned Qwen2.5 LLM")
|
| 186 |
+
gr.ChatInterface(fn=generate_text)
|
| 187 |
+
gr.HTML("<div style='text-align: center; color: #64748b; font-size: 0.9rem; margin-top: 2rem;'>R&D Project developed by HalogenFlo</div>")
|
| 188 |
+
|
| 189 |
+
if __name__ == "__main__":
|
| 190 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
transformers
|
| 3 |
+
torch
|
| 4 |
+
torchvision
|
| 5 |
+
pillow
|
| 6 |
+
numpy
|
| 7 |
+
sentencepiece
|