Srikar00007 commited on
Commit
172997b
·
verified ·
1 Parent(s): d5cfe67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -231
app.py CHANGED
@@ -1,235 +1,71 @@
1
- # import gradio as gr
2
- # from huggingface_hub import InferenceClient
3
-
4
-
5
- # def respond(
6
- # message,
7
- # history: list[dict[str, str]],
8
- # system_message,
9
- # max_tokens,
10
- # temperature,
11
- # top_p,
12
- # hf_token: gr.OAuthToken,
13
- # ):
14
- # """
15
- # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- # """
17
- # client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
-
19
- # messages = [{"role": "system", "content": system_message}]
20
-
21
- # messages.extend(history)
22
-
23
- # messages.append({"role": "user", "content": message})
24
-
25
- # response = ""
26
-
27
- # for message in client.chat_completion(
28
- # messages,
29
- # max_tokens=max_tokens,
30
- # stream=True,
31
- # temperature=temperature,
32
- # top_p=top_p,
33
- # ):
34
- # choices = message.choices
35
- # token = ""
36
- # if len(choices) and choices[0].delta.content:
37
- # token = choices[0].delta.content
38
-
39
- # response += token
40
- # yield response
41
-
42
-
43
- # """
44
- # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- # """
46
- # chatbot = gr.ChatInterface(
47
- # respond,
48
- # type="messages",
49
- # additional_inputs=[
50
- # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- # gr.Slider(
54
- # minimum=0.1,
55
- # maximum=1.0,
56
- # value=0.95,
57
- # step=0.05,
58
- # label="Top-p (nucleus sampling)",
59
- # ),
60
- # ],
61
- # )
62
-
63
- # with gr.Blocks() as demo:
64
- # with gr.Sidebar():
65
- # gr.LoginButton()
66
- # chatbot.render()
67
-
68
-
69
- # if __name__ == "__main__":
70
- # demo.launch()
71
-
72
- # Medical Image Chatbot Application
73
- # app.py
74
- from ultralytics import YOLO
75
- # Imports for the Vision-Language Model (VLM)
76
- from transformers import AutoProcessor, AutoModelForCausalLM
77
  import gradio as gr
78
- from PIL import Image
79
- import torch
80
- import os
81
-
82
- # Global variable to store the exact error if model loading fails
83
- LLAVA_LOAD_ERROR = None
84
-
85
- # -----------------------------
86
- # 1. Load models
87
- # -----------------------------
88
- # YOLO Model Loading (requires 'best.pt' in the same directory)
89
- try:
90
- yolo_model = YOLO("best.pt")
91
- except FileNotFoundError:
92
- print("WARNING: 'best.pt' not found. App will use a dummy classification for structure testing.")
93
- # Dummy class for structure testing if best.pt is missing
94
- class DummyYOLO:
95
- def __call__(self, image):
96
- class_names = {0: "Normal", 1: "Pneumonia", 2: "Fracture"}
97
- class DummyResult:
98
- def __init__(self):
99
- self.names = class_names
100
- # Simulating a top-1 prediction for class 1 (Pneumonia)
101
- self.probs = type('Obj', (object,), {'top1': 1})
102
- return [DummyResult()]
103
- yolo_model = DummyYOLO()
104
-
105
- # Load LLaVA-Med VLM (Processor and Model)
106
- LLAVA_MODEL_ID = "microsoft/llava-med-v1.5-mistral-7b"
107
- print(f"Loading VLM: {LLAVA_MODEL_ID}. This may take some time...")
108
-
109
- try:
110
- # Determine device: Use GPU if available, otherwise CPU
111
- device = "cuda" if torch.cuda.is_available() else "cpu"
112
- print(f"Using device: {device}")
113
-
114
- llava_processor = AutoProcessor.from_pretrained(LLAVA_MODEL_ID)
115
-
116
- # Load model with optimizations for large models
117
- llava_model = AutoModelForCausalLM.from_pretrained(
118
- LLAVA_MODEL_ID,
119
- torch_dtype=torch.float16,
120
- device_map="auto" # Tries to intelligently place model parts (necessary for large models)
121
- )
122
- # Ensure the model is moved to the determined device
123
- llava_model.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- except Exception as e:
126
- # Store the specific error message to display it in the UI
127
- LLAVA_LOAD_ERROR = str(e)
128
- print(f"CRITICAL ERROR: Failed to load LLaVA-Med model. Error: {LLAVA_LOAD_ERROR}")
129
- llava_processor = None
130
- llava_model = None
131
 
132
-
133
- # -----------------------------
134
- # 2. Define main logic
135
- # -----------------------------
136
- def medical_chat(image_path, question):
137
- if image_path is None:
138
- return "⚠️ Please upload a medical image before asking a question."
139
-
140
- # Load image from filepath provided by Gradio (type="filepath" in gr.Image)
141
- try:
142
- image = Image.open(image_path)
143
- except Exception:
144
- return "❌ Could not open the image file from the path provided by Gradio."
145
-
146
- # YOLO classification
147
- try:
148
- results = yolo_model(image)
149
- if hasattr(results[0], 'probs') and results[0].probs is not None and results[0].names:
150
- predicted_class = results[0].names[int(results[0].probs.top1)]
151
- else:
152
- predicted_class = "Unknown class (YOLO detection failed to find a simple classification result)."
153
- except Exception as e:
154
- predicted_class = f"YOLO analysis failed (Error: {e})."
155
- print(f"YOLO Error: {e}")
156
-
157
- yolo_output = f"The YOLO model classified the image as: **{predicted_class}**."
158
-
159
- # Direct YOLO question
160
- if "output of yolo" in question.lower():
161
- return yolo_output
162
-
163
- # Explanation using LLaVA-Med (VLM)
164
- if llava_model is None or llava_processor is None:
165
- # Return the specific error details and troubleshooting tips
166
- troubleshooting_tips = (
167
- "**Troubleshooting:**\n"
168
- "1. **Memory:** LLaVA-Med V1.5 is a 7B parameter model, requiring significant RAM/VRAM. \n"
169
- "2. **Dependencies:** Ensure all libraries in `requirements.txt` (`accelerate`, `bitsandbytes`, `safetensors`) are installed.\n"
170
- "3. **Alternative:** If running on a CPU-only or low-memory machine, consider switching to a smaller text-only model like BioGPT-Large."
171
- )
172
- return (
173
- f"{yolo_output}\n\n---\n\n"
174
- f"❌ **CRITICAL ERROR: LLaVA-Med model failed to load.**\n"
175
- f"**Specific Error:** {LLAVA_LOAD_ERROR or 'No specific error message captured.'}\n\n"
176
- f"{troubleshooting_tips}"
177
- )
178
-
179
- # LLaVA Prompting (multimodal input structure)
180
- llava_prompt = (
181
- f"USER: <image>\n"
182
- f"The medical image was classified by a separate model as '{predicted_class}'. "
183
- f"Based on the visual evidence in the image and this classification, {question} ASSISTANT:"
184
- )
185
-
186
- try:
187
- # Prepare inputs and move to device
188
- inputs = llava_processor(text=llava_prompt, images=image, return_tensors="pt")
189
- inputs = {k: v.to(llava_model.device) for k, v in inputs.items()}
190
-
191
- # Generate response
192
- output = llava_model.generate(
193
- **inputs,
194
- max_new_tokens=200,
195
- do_sample=True,
196
- temperature=0.7,
197
- )
198
-
199
- # Decode and clean up output
200
- answer = llava_processor.decode(output[0], skip_special_tokens=True)
201
- # We only want the ASSISTANT's response
202
- clean_answer = answer.split("ASSISTANT:")[-1].strip()
203
-
204
- except Exception as e:
205
- clean_answer = f"LLaVA-Med failed during response generation. Error: {e}"
206
-
207
- return f"{yolo_output}\n\n---\n\n**Explanation (Powered by LLaVA-Med VLM):** {clean_answer}"
208
-
209
- # -----------------------------
210
- # 3. Build custom UI using Blocks (Custom multimodal layout)
211
- # -----------------------------
212
- with gr.Blocks(title="🧠 YOLO + Medical Chatbot (LLaVA-Med)") as demo:
213
- gr.Markdown("# 🩺 Medical Image Analyzer & Chatbot (YOLO + LLaVA-Med VLM)")
214
- gr.Markdown("Upload an image. YOLO classifies it, and LLaVA-Med uses the image and classification result to provide an expert-level explanation.")
215
-
216
- with gr.Row():
217
- with gr.Column(scale=1):
218
- image_input = gr.Image(type="filepath", label="🩻 Upload Medical Image", height=300)
219
- text_input = gr.Textbox(
220
- label="💬 Ask your question",
221
- placeholder="e.g., What is the output of YOLO? or Explain this disease and its symptoms."
222
- )
223
- submit_btn = gr.Button("Analyze & Ask", variant="primary")
224
-
225
- with gr.Column(scale=2):
226
- output_text = gr.Textbox(label="🧠 Combined Response", lines=10, interactive=False)
227
-
228
- # Trigger the analysis function when the button is clicked
229
- submit_btn.click(fn=medical_chat, inputs=[image_input, text_input], outputs=output_text)
230
-
231
- # -----------------------------
232
- # 4. Launch the app
233
- # -----------------------------
234
  if __name__ == "__main__":
235
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+
5
+ def respond(
6
+ message,
7
+ history: list[dict[str, str]],
8
+ system_message,
9
+ max_tokens,
10
+ temperature,
11
+ top_p,
12
+ hf_token: gr.OAuthToken,
13
+ ):
14
+ """
15
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
+ """
17
+ client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
+
19
+ messages = [{"role": "system", "content": system_message}]
20
+
21
+ messages.extend(history)
22
+
23
+ messages.append({"role": "user", "content": message})
24
+
25
+ response = ""
26
+
27
+ for message in client.chat_completion(
28
+ messages,
29
+ max_tokens=max_tokens,
30
+ stream=True,
31
+ temperature=temperature,
32
+ top_p=top_p,
33
+ ):
34
+ choices = message.choices
35
+ token = ""
36
+ if len(choices) and choices[0].delta.content:
37
+ token = choices[0].delta.content
38
+
39
+ response += token
40
+ yield response
41
+
42
+
43
+ """
44
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
+ """
46
+ chatbot = gr.ChatInterface(
47
+ respond,
48
+ type="messages",
49
+ additional_inputs=[
50
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
+ gr.Slider(
54
+ minimum=0.1,
55
+ maximum=1.0,
56
+ value=0.95,
57
+ step=0.05,
58
+ label="Top-p (nucleus sampling)",
59
+ ),
60
+ ],
61
+ )
62
+
63
+ with gr.Blocks() as demo:
64
+ with gr.Sidebar():
65
+ gr.LoginButton()
66
+ chatbot.render()
67
 
 
 
 
 
 
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  if __name__ == "__main__":
70
+ demo.launch()
71
+