yuhos16 commited on Feb 8

Commit

9fb76f8

verified ·

1 Parent(s): dae35d7

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

inference/.ipynb_checkpoints/app-checkpoint.py +423 -0
inference/.ipynb_checkpoints/chat-checkpoint.py +68 -0
inference/.ipynb_checkpoints/deepseek_service-checkpoint.py +384 -0
inference/.ipynb_checkpoints/demo-checkpoint.py +76 -0
inference/.ipynb_checkpoints/inference-checkpoint.py +43 -0
inference/.ipynb_checkpoints/model_utils-checkpoint.py +120 -0
inference/__pycache__/app.cpython-311.pyc +0 -0
inference/__pycache__/deepseek_service.cpython-311.pyc +0 -0
inference/__pycache__/model_utils.cpython-311.pyc +0 -0
inference/app.py +423 -0
inference/chat.py +68 -0
inference/deepseek_service.py +384 -0
inference/demo.py +79 -0
inference/inference.py +43 -0
inference/model_utils.py +124 -0
inference/temp_uploads/.ipynb_checkpoints/temp_d2b1c6f9a43940d2812f10a8cc8bc3ef-checkpoint.jpg +0 -0
inference/temp_uploads/.ipynb_checkpoints/user_1769671453128_43ccc61bfcb64c6bbbabbadfa887591c-checkpoint.jpg +0 -0
inference/temp_uploads/temp_d2b1c6f9a43940d2812f10a8cc8bc3ef.jpg +0 -0
inference/temp_uploads/user_1769671453128_43ccc61bfcb64c6bbbabbadfa887591c.jpg +0 -0

inference/.ipynb_checkpoints/app-checkpoint.py ADDED Viewed

	@@ -0,0 +1,423 @@

+# app.py
+import uvicorn
+import os
+import shutil
+import uuid
+import json
+import re
+import asyncio
+from typing import Optional
+from io import BytesIO
+from contextlib import asynccontextmanager
+from PIL import Image
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from fastapi.concurrency import run_in_threadpool
+from model_utils import SkinGPTModel
+from deepseek_service import get_deepseek_service, DeepSeekService
+# === Configuration ===
+MODEL_PATH = "../checkpoint"
+TEMP_DIR = "./temp_uploads"
+os.makedirs(TEMP_DIR, exist_ok=True)
+# DeepSeek API Key
+DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "sk-b221f29be052460f9e0fe12d88dd343c")
+# Global DeepSeek service instance
+deepseek_service: Optional[DeepSeekService] = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """应用生命周期管理"""
+    # 启动时初始化 DeepSeek 服务
+    await init_deepseek()
+    yield
+    print("\nShutting down service...")
+app = FastAPI(
+    title="SkinGPT-R1 皮肤诊断系统",
+    description="智能皮肤诊断助手",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# CORS配置 - 允许前端访问
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://localhost:3000", "http://localhost:5173", "http://127.0.0.1:5173", "*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# 全局变量存储状态
+# chat_states: 存储对话历史 (List of messages for Qwen)
+# pending_images: 存储已上传但尚未发送给LLM的图片路径 (State ID -> Image Path)
+chat_states = {}
+pending_images = {}
+def parse_diagnosis_result(raw_text: str) -> dict:
+    """
+    解析诊断结果中的think和answer标签
+    参数:
+    - raw_text: 原始诊断文本
+    返回:
+    - dict: 包含thinking, answer, raw字段的字典
+    """
+    import re
+    # 尝试匹配完整的标签
+    think_match = re.search(r'<think>([\s\S]*?)</think>', raw_text)
+    answer_match = re.search(r'<answer>([\s\S]*?)</answer>', raw_text)
+    thinking = None
+    answer = None
+    # 处理think标签
+    if think_match:
+        thinking = think_match.group(1).strip()
+    else:
+        # 尝试匹配未闭合的think标签（输出被截断的情况）
+        unclosed_think = re.search(r'<think>([\s\S]*?)(?=<answer>|$)', raw_text)
+        if unclosed_think:
+            thinking = unclosed_think.group(1).strip()
+    # 处理answer标签
+    if answer_match:
+        answer = answer_match.group(1).strip()
+    else:
+        # 尝试匹配未闭合的answer标签
+        unclosed_answer = re.search(r'<answer>([\s\S]*?)$', raw_text)
+        if unclosed_answer:
+            answer = unclosed_answer.group(1).strip()
+    # 如果仍然没有找到answer，清理原始文本作为answer
+    if not answer:
+        # 移除所有标签及其内容
+        cleaned = re.sub(r'<think>[\s\S]*?</think>', '', raw_text)
+        cleaned = re.sub(r'<think>[\s\S]*', '', cleaned)  # 移除未闭合的think
+        cleaned = re.sub(r'</?answer>', '', cleaned)  # 移除answer标签
+        cleaned = cleaned.strip()
+        answer = cleaned if cleaned else raw_text
+    # 清理可能残留的标签
+    if answer:
+        answer = re.sub(r'</?think>|</?answer>', '', answer).strip()
+    if thinking:
+        thinking = re.sub(r'</?think>|</?answer>', '', thinking).strip()
+    # 处理 "Final Answer:" 格式，提取其后的内容
+    if answer:
+        final_answer_match = re.search(r'Final Answer:\s*([\s\S]*)', answer, re.IGNORECASE)
+        if final_answer_match:
+            answer = final_answer_match.group(1).strip()
+    return {
+        "thinking": thinking if thinking else None,
+        "answer": answer,
+        "raw": raw_text
+    }
+print("Initializing Model Service...")
+# 全局加载模型
+gpt_model = SkinGPTModel(MODEL_PATH)
+print("Service Ready.")
+# 初始化 DeepSeek 服务（异步）
+async def init_deepseek():
+    global deepseek_service
+    print("\nInitializing DeepSeek service...")
+    deepseek_service = await get_deepseek_service(api_key=DEEPSEEK_API_KEY)
+    if deepseek_service and deepseek_service.is_loaded:
+        print("DeepSeek service is ready!")
+    else:
+        print("DeepSeek service not available, will return raw results")
+@app.post("/v1/upload/{state_id}")
+async def upload_file(state_id: str, file: UploadFile = File(...), survey: str = Form(None)):
+    """
+    接收图片上传。
+    逻辑：将图片保存到本地临时目录，并标记该 state_id 有一张待处理图片。
+    """
+    try:
+        # 1. 保存图片到本地临时文件
+        file_extension = file.filename.split(".")[-1] if "." in file.filename else "jpg"
+        unique_name = f"{state_id}_{uuid.uuid4().hex}.{file_extension}"
+        file_path = os.path.join(TEMP_DIR, unique_name)
+        with open(file_path, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        # 2. 记录图片路径等待下一次 predict 调用时使用
+        # 如果是多图模式，这里可以改成 list，目前演示单图覆盖或更新
+        pending_images[state_id] = file_path
+        # 3. 初始化对话状态（如果是新会话）
+        if state_id not in chat_states:
+            chat_states[state_id] = []
+        return {"message": "Image uploaded successfully", "path": file_path}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
+@app.post("/v1/predict/{state_id}")
+async def v1_predict(request: Request, state_id: str):
+    """
+    接收文本并执行推理。
+    逻辑：检查是否有待处理图片。如果有，将其与文本组合成 multimodal 消息。
+    """
+    try:
+        data = await request.json()
+    except:
+        raise HTTPException(status_code=400, detail="Invalid JSON")
+    user_message = data.get("message", "")
+    if not user_message:
+        raise HTTPException(status_code=400, detail="Missing 'message' field")
+    # 获取或初始化历史
+    history = chat_states.get(state_id, [])
+    # 构建当前轮次的用户内容
+    current_content = []
+    # 1. 检查是否有刚刚上传的图片
+    if state_id in pending_images:
+        img_path = pending_images.pop(state_id) # 取出并移除
+        current_content.append({"type": "image", "image": img_path})
+        # 如果是第一次对话，加上 System Prompt
+        if not history:
+             system_prompt = "You are a professional AI dermatology assistant. "
+             user_message = f"{system_prompt}\n\n{user_message}"
+    # 2. 添加文本
+    current_content.append({"type": "text", "text": user_message})
+    # 3. 更新历史
+    history.append({"role": "user", "content": current_content})
+    chat_states[state_id] = history
+    # 4. 运行推理 (在线程池中运行以防阻塞)
+    try:
+        response_text = await run_in_threadpool(
+            gpt_model.generate_response,
+            messages=history
+        )
+    except Exception as e:
+        # 回滚历史（移除刚才出错的用户提问）
+        chat_states[state_id].pop()
+        raise HTTPException(status_code=500, detail=f"Inference error: {str(e)}")
+    # 5. 将回复加入历史
+    history.append({"role": "assistant", "content": [{"type": "text", "text": response_text}]})
+    chat_states[state_id] = history
+    return {"message": response_text}
+@app.post("/v1/reset/{state_id}")
+async def reset_chat(state_id: str):
+    """清除会话状态"""
+    if state_id in chat_states:
+        del chat_states[state_id]
+    if state_id in pending_images:
+        # 可选：删除临时文件
+        try:
+            os.remove(pending_images[state_id])
+        except:
+            pass
+        del pending_images[state_id]
+    return {"message": "Chat history reset"}
+@app.get("/")
+async def root():
+    """根路径"""
+    return {
+        "name": "SkinGPT-R1 皮肤诊断系统",
+        "version": "1.0.0",
+        "status": "running",
+        "description": "智能皮肤诊断助手"
+    }
+@app.get("/health")
+async def health_check():
+    """健康检查"""
+    return {
+        "status": "healthy",
+        "model_loaded": True
+    }
+@app.post("/diagnose/stream")
+async def diagnose_stream(
+    image: Optional[UploadFile] = File(None),
+    text: str = Form(...),
+    language: str = Form("zh"),
+):
+    """
+    SSE流式诊断接口（用于前端）
+    支持图片上传和文本输入，返回真正的流式响应
+    使用 DeepSeek API 优化输出格式
+    """
+    from queue import Queue, Empty
+    from threading import Thread
+    language = language if language in ("zh", "en") else "zh"
+    # 处理图片
+    pil_image = None
+    temp_image_path = None
+    if image:
+        contents = await image.read()
+        pil_image = Image.open(BytesIO(contents)).convert("RGB")
+    # 创建队列用于线程间通信
+    result_queue = Queue()
+    # 用于存储完整响应和解析结果
+    generation_result = {"full_response": [], "parsed": None, "temp_image_path": None}
+    def run_generation():
+        """在后台线程中运行流式生成"""
+        full_response = []
+        try:
+            # 构建消息
+            messages = []
+            current_content = []
+            # 添加系统提示
+            system_prompt = "You are a professional AI dermatology assistant." if language == "en" else "你是一个专业的AI皮肤科助手。"
+            # 如果有图片，保存到临时文件
+            if pil_image:
+                generation_result["temp_image_path"] = os.path.join(TEMP_DIR, f"temp_{uuid.uuid4().hex}.jpg")
+                pil_image.save(generation_result["temp_image_path"])
+                current_content.append({"type": "image", "image": generation_result["temp_image_path"]})
+            # 添加文本
+            prompt = f"{system_prompt}\n\n{text}"
+            current_content.append({"type": "text", "text": prompt})
+            messages.append({"role": "user", "content": current_content})
+            # 流式生成 - 每个 chunk 立即放入队列
+            for chunk in gpt_model.generate_response_stream(
+                messages=messages,
+                max_new_tokens=2048,
+                temperature=0.7
+            ):
+                full_response.append(chunk)
+                result_queue.put(("delta", chunk))
+            # 解析结果
+            response_text = "".join(full_response)
+            parsed = parse_diagnosis_result(response_text)
+            generation_result["full_response"] = full_response
+            generation_result["parsed"] = parsed
+            # 标记生成完成
+            result_queue.put(("generation_done", None))
+        except Exception as e:
+            result_queue.put(("error", str(e)))
+    async def event_generator():
+        """异步生成SSE事件"""
+        # 在后台线程启动生成（非阻塞）
+        gen_thread = Thread(target=run_generation)
+        gen_thread.start()
+        loop = asyncio.get_event_loop()
+        # 从队列中读取并发送流式内容
+        while True:
+            try:
+                # 非阻塞获取
+                msg_type, data = await loop.run_in_executor(
+                    None,
+                    lambda: result_queue.get(timeout=0.1)
+                )
+                if msg_type == "generation_done":
+                    # 流式生成完成，准备处理最终结果
+                    break
+                elif msg_type == "delta":
+                    yield_chunk = json.dumps({"type": "delta", "text": data}, ensure_ascii=False)
+                    yield f"data: {yield_chunk}\n\n"
+                elif msg_type == "error":
+                    yield f"data: {json.dumps({'type': 'error', 'message': data}, ensure_ascii=False)}\n\n"
+                    gen_thread.join()
+                    return
+            except Empty:
+                # 队列暂时为空，继续等待
+                await asyncio.sleep(0.01)
+                continue
+        gen_thread.join()
+        # 获取解析结果
+        parsed = generation_result["parsed"]
+        if not parsed:
+            yield f"data: {json.dumps({'type': 'error', 'message': 'Failed to parse response'}, ensure_ascii=False)}\n\n"
+            return
+        raw_thinking = parsed["thinking"]
+        raw_answer = parsed["answer"]
+        # 使用 DeepSeek 优化结果
+        refined_by_deepseek = False
+        description = None
+        thinking = raw_thinking
+        answer = raw_answer
+        if deepseek_service and deepseek_service.is_loaded:
+            try:
+                print(f"Calling DeepSeek to refine diagnosis (language={language})...")
+                refined = await deepseek_service.refine_diagnosis(
+                    raw_answer=raw_answer,
+                    raw_thinking=raw_thinking,
+                    language=language,
+                )
+                if refined["success"]:
+                    description = refined["description"]
+                    thinking = refined["analysis_process"]
+                    answer = refined["diagnosis_result"]
+                    refined_by_deepseek = True
+                    print(f"DeepSeek refinement completed successfully")
+            except Exception as e:
+                print(f"DeepSeek refinement failed, using original: {e}")
+        else:
+            print("DeepSeek service not available, using raw results")
+        success_msg = "Diagnosis completed" if language == "en" else "诊断完成"
+        # 返回格式与参考项目保持一致
+        final_payload = {
+            "description": description,              # 图片描述（从 thinking 中提取）
+            "thinking": thinking,                    # 分析过程（DeepSeek 优化后）
+            "answer": answer,                        # 诊断结果（DeepSeek 优化后）
+            "raw": parsed["raw"],                    # 原始响应
+            "refined_by_deepseek": refined_by_deepseek,  # 是否被 DeepSeek 优化
+            "success": True,
+            "message": success_msg
+        }
+        yield_final = json.dumps({"type": "final", "result": final_payload}, ensure_ascii=False)
+        yield f"data: {yield_final}\n\n"
+        # 清理临时图片
+        temp_path = generation_result.get("temp_image_path")
+        if temp_path and os.path.exists(temp_path):
+            try:
+                os.remove(temp_path)
+            except:
+                pass
+    return StreamingResponse(event_generator(), media_type="text/event-stream")
+if __name__ == '__main__':
+    uvicorn.run("app:app", host="0.0.0.0", port=5900, reload=False)

inference/.ipynb_checkpoints/chat-checkpoint.py ADDED Viewed

	@@ -0,0 +1,68 @@

+# chat.py
+import argparse
+import os
+from model_utils import SkinGPTModel
+def main():
+    parser = argparse.ArgumentParser(description="SkinGPT-R1 Multi-turn Chat")
+    parser.add_argument("--model_path", type=str, default="../checkpoint")
+    parser.add_argument("--image", type=str, required=True, help="Path to initial image")
+    args = parser.parse_args()
+    # 初始化模型
+    bot = SkinGPTModel(args.model_path)
+    # 初始化对话历史
+    # 系统提示词
+    system_prompt = "You are a professional AI dermatology assistant. Analyze the skin condition carefully."
+    # 构造第一条包含图片的消息
+    if not os.path.exists(args.image):
+        print(f"Error: Image {args.image} not found.")
+        return
+    history = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": args.image},
+                {"type": "text", "text": f"{system_prompt}\n\nPlease analyze this image."}
+            ]
+        }
+    ]
+    print("\n=== SkinGPT-R1 Chat (Type 'exit' to quit) ===")
+    print(f"Image loaded: {args.image}")
+    # 获取第一轮诊断
+    print("\nModel is thinking...", end="", flush=True)
+    response = bot.generate_response(history)
+    print(f"\rAssistant: {response}\n")
+    # 将助手的回复加入历史
+    history.append({"role": "assistant", "content": [{"type": "text", "text": response}]})
+    # 进入多轮对话循环
+    while True:
+        try:
+            user_input = input("User: ")
+            if user_input.lower() in ["exit", "quit"]:
+                break
+            if not user_input.strip():
+                continue
+            # 加入用户的新问题
+            history.append({"role": "user", "content": [{"type": "text", "text": user_input}]})
+            print("Model is thinking...", end="", flush=True)
+            response = bot.generate_response(history)
+            print(f"\rAssistant: {response}\n")
+            # 加入助手的新回复
+            history.append({"role": "assistant", "content": [{"type": "text", "text": response}]})
+        except KeyboardInterrupt:
+            break
+if __name__ == "__main__":
+    main()

inference/.ipynb_checkpoints/deepseek_service-checkpoint.py ADDED Viewed

	@@ -0,0 +1,384 @@

+"""
+DeepSeek API Service
+Used to optimize and organize SkinGPT model output results
+"""
+import os
+import re
+from typing import Optional
+from openai import AsyncOpenAI
+class DeepSeekService:
+    """DeepSeek API Service Class"""
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize DeepSeek service
+        Parameters:
+            api_key: DeepSeek API key, reads from environment variable if not provided
+        """
+        self.api_key = api_key or os.environ.get("DEEPSEEK_API_KEY")
+        self.base_url = "https://api.deepseek.com"
+        self.model = "deepseek-chat"  # Using deepseek-chat model
+        self.client = None
+        self.is_loaded = False
+        print(f"DeepSeek API service initializing...")
+        print(f"API Base URL: {self.base_url}")
+    async def load(self):
+        """Initialize DeepSeek API client"""
+        try:
+            if not self.api_key:
+                print("DeepSeek API key not provided")
+                self.is_loaded = False
+                return
+            # Initialize OpenAI compatible client
+            self.client = AsyncOpenAI(
+                api_key=self.api_key,
+                base_url=self.base_url
+            )
+            self.is_loaded = True
+            print("DeepSeek API service is ready!")
+        except Exception as e:
+            print(f"DeepSeek API service initialization failed: {e}")
+            self.is_loaded = False
+    async def refine_diagnosis(
+        self,
+        raw_answer: str,
+        raw_thinking: Optional[str] = None,
+        language: str = "zh"
+    ) -> dict:
+        """
+        Use DeepSeek API to optimize and organize diagnosis results
+        Parameters:
+            raw_answer: Original diagnosis result
+            raw_thinking: AI thinking process
+            language: Language option
+        Returns:
+            Dictionary containing "description", "analysis_process" and "diagnosis_result"
+        """
+        if not self.is_loaded or self.client is None:
+            error_msg = "API not initialized, cannot generate analysis" if language == "en" else "API未初始化，无法生成分析过程"
+            print("DeepSeek API not initialized, returning original result")
+            return {
+                "success": False,
+                "description": "",
+                "analysis_process": raw_thinking or error_msg,
+                "diagnosis_result": raw_answer,
+                "original_diagnosis": raw_answer,
+                "error": "DeepSeek API not initialized"
+            }
+        try:
+            # Build prompt
+            prompt = self._build_refine_prompt(raw_answer, raw_thinking, language)
+            # Select system prompt based on language
+            if language == "en":
+                system_content = "You are a professional medical text editor. Your task is to polish and organize medical diagnostic text to make it flow smoothly while preserving the original meaning. Output ONLY the formatted result. Do NOT add any explanations, comments, or thoughts. Just follow the format exactly."
+            else:
+                system_content = "你是医学文本整理专家，按照用户要求将用户输入的文本整理成用户想要的格式，不要改写或总结。"
+            # Call DeepSeek API
+            response = await self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": system_content},
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=0.1,
+                max_tokens=2048,
+                top_p=0.8,
+            )
+            # Extract generated text
+            generated_text = response.choices[0].message.content
+            # Parse output
+            parsed = self._parse_refined_output(generated_text, raw_answer, raw_thinking, language)
+            return {
+                "success": True,
+                "description": parsed["description"],
+                "analysis_process": parsed["analysis_process"],
+                "diagnosis_result": parsed["diagnosis_result"],
+                "original_diagnosis": raw_answer,
+                "raw_refined": generated_text
+            }
+        except Exception as e:
+            print(f"DeepSeek API call failed: {e}")
+            error_msg = "API call failed, cannot generate analysis" if language == "en" else "API调用失败，无法生成分析过程"
+            return {
+                "success": False,
+                "description": "",
+                "analysis_process": raw_thinking or error_msg,
+                "diagnosis_result": raw_answer,
+                "original_diagnosis": raw_answer,
+                "error": str(e)
+            }
+    def _build_refine_prompt(self, raw_answer: str, raw_thinking: Optional[str] = None, language: str = "zh") -> str:
+        """
+        Build optimization prompt
+        Parameters:
+            raw_answer: Original diagnosis result
+            raw_thinking: AI thinking process
+            language: Language option, "zh" for Chinese, "en" for English
+        Returns:
+            Built prompt
+        """
+        if language == "en":
+            # English prompt - organize and polish while preserving meaning
+            thinking_text = raw_thinking if raw_thinking else "No analysis process available."
+            prompt = f"""You are a text organization expert. There are two texts that need to be organized. Text 1 is the thinking process of the SkinGPT model, and Text 2 is the diagnosis result given by SkinGPT.
+【Requirements】
+- Preserve the original tone and expression style
+- Text 1 contains the thinking process, Text 2 contains the diagnosis result
+- Extract the image observation part from the thinking process as Description. This should include all factual observations about what was seen in the image, not just a brief summary.
+- For Diagnostic Reasoning: refine and condense the remaining thinking content. Remove redundancies, self-doubt, circular reasoning, and unnecessary repetition. Keep it concise and not too long. Keep the logical chain clear and enhance readability. IMPORTANT: DO NOT include any image description or visual observations in Diagnostic Reasoning. Only include reasoning, analysis, and diagnostic thought process.
+- If [Text 1] content is NOT: No analysis process available. Then organize [Text 1] content accordingly, DO NOT confuse [Text 1] and [Text 2]
+- If [Text 1] content IS: No analysis process available. Then extract the analysis process and description from [Text 2]
+- DO NOT infer or add new medical information, DO NOT output any meta-commentary
+- You may adjust unreasonable statements or remove redundant content to improve clarity
+[Text 1]
+{thinking_text}
+[Text 2]
+{raw_answer}
+【Output】Only output three sections, do not output anything else:
+## Description
+(Extract all image observation content from the thinking process - include all factual descriptions of what was seen)
+## Analysis Process
+(Refined and condensed diagnostic reasoning: remove self-doubt, circular logic, and redundancies. Keep it concise and not too long. Keep logical flow clear. Do NOT include image observations)
+## Diagnosis Result
+(The organized diagnosis result from Text 2)
+【Example】:
+## Description
+The image shows red inflamed patches on the skin with pustules and darker colored spots. The lesions appear as papules and pustules distributed across the affected area, with some showing signs of inflammation and possible post-inflammatory hyperpigmentation.
+## Analysis Process
+These findings are consistent with acne vulgaris, commonly seen during adolescence. The user's age aligns with typical onset for this condition. Treatment recommendations: over-the-counter medications such as benzoyl peroxide or topical antibiotics, avoiding picking at the skin, and consulting a dermatologist if severe. The goal is to control inflammation and prevent scarring.
+## Diagnosis Result
+Possible diagnosis: Acne (pimples) Explanation: Acne is a common skin condition, especially during adolescence, when hormonal changes cause overactive sebaceous glands, which can easily clog pores and form acne. Pathological care recommendations: 1. Keep face clean, wash face 2-3 times daily, use gentle cleansing products. 2. Avoid squeezing acne with hands to prevent worsening inflammation or leaving scars. 3. Avoid using irritating cosmetics and skincare products. 4. Can use topical medications containing salicylic acid, benzoyl peroxide, etc. 5. If necessary, can use oral antibiotics or other treatment methods under doctor's guidance. Precautions: 1. Avoid rubbing or damaging the affected area to prevent infection. 2. Eat less oily and spicy foods, eat more vegetables and fruits. 3. Maintain good rest habits, avoid staying up late. 4. If acne symptoms persist without improvement or show signs of worsening, seek medical attention promptly.
+"""
+        else:
+            # Chinese prompt - translate to Simplified Chinese AND organize/polish
+            thinking_text = raw_thinking if raw_thinking else "No analysis process available."
+            prompt = f"""你是一个文本整理专家。有两段文本需要整理，文本1是SkinGPT模型的思考过程的文本，文本2是SkinGPT给出的诊断结果的文本。
+【要求】
+- 保留原文的语气和表达方式
+- 文本1是思考过程，文本2是诊断结果
+- 从思考过程中提取图像观察部分作为图像描述。需要包含所有关于图片中观察到的事实内容，不要简化或缩短。
+- 对于分析过程：提炼并精简剩余的思考内容，去除冗余、自我怀疑、兜圈子的内容。保持简洁，不要太长。保持逻辑链条清晰，增强可读性。重要：分析过程中不���包含任何图像描述或视觉观察内容，只包含推理、分析和诊断思考过程。
+- 如果【文本1】内容不是：No analysis process available.那么按要求整理【文本1】的内容，不要混淆【文本1】和【文本2】。
+- 如果【文本1】内容是：No analysis process available.那么从【文本2】提炼分析过程和描述。
+- 【文本1】和【文本2】需要翻译成简体中文
+- 禁止推断或添加新的医学信息，禁止输出任何元评论
+- 可以调整不合理的语句或去除冗余内容以提高清晰度
+【文本1】
+{thinking_text}
+【文本2】
+{raw_answer}
+【输出】只输出三个部分，不要输出其他任何内容：
+## 图像描述
+（从思考过程中提取所有图像观察内容，包含所有关于图片的事实描述）
+## 分析过程
+（提炼并精简后的诊断推理：去除自我怀疑、兜圈逻辑和冗余内容。保持简洁，不要太长。保持逻辑流畅。不包含图像观察）
+## 诊断结果
+（整理后的诊断结果）
+【样例】:
+## 图像描述
+图片显示皮肤上有红色发炎的斑块，伴有脓疱和颜色较深的斑点。病变表现为分布在受影响区域的丘疹和脓疱，部分显示出炎症迹象和可能的炎症后色素沉着。
+## 分析过程
+这些表现符合寻常痤疮的特征，青春期常见。用户的年龄与该病症的典型发病年龄相符。治疗建议：使用非处方药物如过氧化苯甲酰或外用抗生素，避免抠抓皮肤，病情严重时咨询皮肤科医生。目标是控制炎症并防止疤痕形成。
+## 诊断结果
+可能的诊断：痤疮（青春痘） 解释：痤疮是一种常见的皮肤病，特别是在青少年期间，由于激素水平的变化导致皮脂腺过度活跃，容易堵塞毛孔，形成痤疮。 病理护理建议：1.保持面部清洁，每天洗脸2-3次，使用温和的洁面产品。 2.避免用手挤压痤疮，以免加重炎症或留下疤痕。 3.避免使用刺激性的化妆品和护肤品。 4.可以使用含有水杨酸、苯氧醇等成分的外用药物治疗。 5.如有需要，可以在医生指导下使用抗生素口服药或其他治疗方法。 注意事项：1. 避免摩擦或损伤患处，以免引起感染。 2. 饮食上应少吃油腻、辛辣食物，多吃蔬菜水果。 3. 保持良好的作息习惯，避免熬夜。 4. 如果痤疮症状持续不见好转或有恶化的趋势，应及时就医。
+"""
+        return prompt
+    def _parse_refined_output(
+        self,
+        generated_text: str,
+        raw_answer: str,
+        raw_thinking: Optional[str] = None,
+        language: str = "zh"
+    ) -> dict:
+        """
+        Parse DeepSeek generated output
+        Parameters:
+            generated_text: DeepSeek generated text
+            raw_answer: Original diagnosis (as fallback)
+            raw_thinking: Original thinking process (as fallback)
+            language: Language option
+        Returns:
+            Dictionary containing description, analysis_process and diagnosis_result
+        """
+        description = ""
+        analysis_process = None
+        diagnosis_result = None
+        if language == "en":
+            # English patterns
+            desc_match = re.search(
+                r'##\s*Description\s*\n([\s\S]*?)(?=##\s*Analysis\s*Process|$)',
+                generated_text,
+                re.IGNORECASE
+            )
+            analysis_match = re.search(
+                r'##\s*Analysis\s*Process\s*\n([\s\S]*?)(?=##\s*Diagnosis\s*Result|$)',
+                generated_text,
+                re.IGNORECASE
+            )
+            result_match = re.search(
+                r'##\s*Diagnosis\s*Result\s*\n([\s\S]*?)$',
+                generated_text,
+                re.IGNORECASE
+            )
+            desc_header = "## Description"
+            analysis_header = "## Analysis Process"
+            result_header = "## Diagnosis Result"
+        else:
+            # Chinese patterns
+            desc_match = re.search(
+                r'##\s*图像描述\s*\n([\s\S]*?)(?=##\s*分析过程|$)',
+                generated_text
+            )
+            analysis_match = re.search(
+                r'##\s*分析过程\s*\n([\s\S]*?)(?=##\s*诊断结果|$)',
+                generated_text
+            )
+            result_match = re.search(
+                r'##\s*诊断结果\s*\n([\s\S]*?)$',
+                generated_text
+            )
+            desc_header = "## 图像描述"
+            analysis_header = "## 分析过程"
+            result_header = "## 诊断结果"
+        # Extract description
+        if desc_match:
+            description = desc_match.group(1).strip()
+            print(f"Successfully parsed description")
+        else:
+            print(f"Description parsing failed")
+            description = ""
+        # Extract analysis process
+        if analysis_match:
+            analysis_process = analysis_match.group(1).strip()
+            print(f"Successfully parsed analysis process")
+        else:
+            print(f"Analysis process parsing failed, trying other methods")
+            # Try to extract from generated text
+            result_pos = generated_text.find(result_header)
+            if result_pos > 0:
+                # Get content before diagnosis result
+                analysis_process = generated_text[:result_pos].strip()
+                # Remove possible headers
+                for header in [desc_header, analysis_header]:
+                    header_escaped = re.escape(header)
+                    analysis_process = re.sub(f'{header_escaped}\\s*\\n?', '', analysis_process).strip()
+            else:
+                # If no format at all, try to get first half
+                mid_point = len(generated_text) // 2
+                analysis_process = generated_text[:mid_point].strip()
+            # If still empty, use original content (final fallback)
+            if not analysis_process and raw_thinking:
+                print(f"Using original raw_thinking as fallback")
+                analysis_process = raw_thinking
+        # Extract diagnosis result
+        if result_match:
+            diagnosis_result = result_match.group(1).strip()
+            print(f"Successfully parsed diagnosis result")
+        else:
+            print(f"Diagnosis result parsing failed, trying other methods")
+            # Try to extract from generated text
+            result_pos = generated_text.find(result_header)
+            if result_pos > 0:
+                diagnosis_result = generated_text[result_pos:].strip()
+                # Remove possible header
+                result_header_escaped = re.escape(result_header)
+                diagnosis_result = re.sub(f'^{result_header_escaped}\\s*\\n?', '', diagnosis_result).strip()
+            else:
+                # If no format at all, get second half
+                mid_point = len(generated_text) // 2
+                diagnosis_result = generated_text[mid_point:].strip()
+            # If still empty, use original content (final fallback)
+            if not diagnosis_result:
+                print(f"Using original raw_answer as fallback")
+                diagnosis_result = raw_answer
+        return {
+            "description": description,
+            "analysis_process": analysis_process,
+            "diagnosis_result": diagnosis_result
+        }
+# Global DeepSeek service instance (lazy loading)
+_deepseek_service: Optional[DeepSeekService] = None
+async def get_deepseek_service(api_key: Optional[str] = None) -> Optional[DeepSeekService]:
+    """
+    Get DeepSeek service instance (singleton pattern)
+    Parameters:
+        api_key: Optional API key to use
+    Returns:
+        DeepSeekService instance, or None if API initialization fails
+    """
+    global _deepseek_service
+    if _deepseek_service is None:
+        try:
+            _deepseek_service = DeepSeekService(api_key=api_key)
+            await _deepseek_service.load()
+            if not _deepseek_service.is_loaded:
+                print("DeepSeek API service initialization failed, will use fallback mode")
+                return _deepseek_service  # Return instance but marked as not loaded
+        except Exception as e:
+            print(f"DeepSeek service initialization failed: {e}")
+            return None
+    return _deepseek_service

inference/.ipynb_checkpoints/demo-checkpoint.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import torch
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+from PIL import Image
+# === Configuration ===
+MODEL_PATH = "../checkpoint"
+IMAGE_PATH = "test_image.jpg" # Please replace with your actual image path
+PROMPT = "You are a professional AI dermatology assistant. Please analyze this skin image and provide a diagnosis."
+def main():
+    print(f"Loading model from {MODEL_PATH}...")
+    # 1. Load Model
+    try:
+        model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            MODEL_PATH,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+            trust_remote_code=True
+        )
+        processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        return
+    # 2. Check Image
+    import os
+    if not os.path.exists(IMAGE_PATH):
+        print(f"Warning: Image not found at '{IMAGE_PATH}'. Please edit IMAGE_PATH in demo.py")
+        # Create a dummy image for code demonstration purposes if needed, or just return
+        return
+    # 3. Prepare Inputs
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": IMAGE_PATH},
+                {"type": "text", "text": PROMPT},
+            ],
+        }
+    ]
+    print("Processing...")
+    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    ).to(model.device)
+    # 4. Generate
+    with torch.no_grad():
+        generated_ids = model.generate(
+            **inputs,
+            max_new_tokens=1024,
+            temperature=0.7,
+            top_p=0.9
+        )
+    # 5. Decode
+    output_text = processor.batch_decode(
+        generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    print("\n=== Diagnosis Result ===")
+    print(output_text[0])
+    print("========================")
+if __name__ == "__main__":
+    main()

inference/.ipynb_checkpoints/inference-checkpoint.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import argparse
+from model_utils import SkinGPTModel
+import os
+def main():
+    parser = argparse.ArgumentParser(description="SkinGPT-R1 Single Inference")
+    parser.add_argument("--image", type=str, required=True, help="Path to the image")
+    parser.add_argument("--model_path", type=str, default="../checkpoint")
+    parser.add_argument("--prompt", type=str, default="Please analyze this skin image and provide a diagnosis.")
+    args = parser.parse_args()
+    if not os.path.exists(args.image):
+        print(f"Error: Image not found at {args.image}")
+        return
+    # 1. 加载模型 (复用 model_utils)
+    # 这样你就不用在这里重复写 transformers 的加载代码了
+    bot = SkinGPTModel(args.model_path)
+    # 2. 构造单轮消息
+    system_prompt = "You are a professional AI dermatology assistant."
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": args.image},
+                {"type": "text", "text": f"{system_prompt}\n\n{args.prompt}"}
+            ]
+        }
+    ]
+    # 3. 推理
+    print(f"\nAnalyzing {args.image}...")
+    response = bot.generate_response(messages)
+    print("-" * 40)
+    print("Result:")
+    print(response)
+    print("-" * 40)
+if __name__ == "__main__":
+    main()

inference/.ipynb_checkpoints/model_utils-checkpoint.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# model_utils.py
+import torch
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
+from qwen_vl_utils import process_vision_info
+from PIL import Image
+import os
+from threading import Thread
+class SkinGPTModel:
+    def __init__(self, model_path, device=None):
+        self.model_path = model_path
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Loading model from {model_path} on {self.device}...")
+        self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            model_path,
+            torch_dtype=torch.bfloat16 if self.device != "cpu" else torch.float32,
+            attn_implementation="flash_attention_2" if self.device == "cuda" else None,
+            device_map="auto" if self.device != "mps" else None,
+            trust_remote_code=True
+        )
+        if self.device == "mps":
+            self.model = self.model.to(self.device)
+        self.processor = AutoProcessor.from_pretrained(
+            model_path,
+            trust_remote_code=True,
+            min_pixels=256*28*28,
+            max_pixels=1280*28*28
+        )
+        print("Model loaded successfully.")
+    def generate_response(self, messages, max_new_tokens=1024, temperature=0.7):
+        """
+        处理多轮对话的历史消息列表并生成回复
+        messages format:
+        [
+            {'role': 'user', 'content': [{'type': 'image', 'image': 'path...'}, {'type': 'text', 'text': '...'}]},
+            {'role': 'assistant', 'content': [{'type': 'text', 'text': '...'}]}
+        ]
+        """
+        # 预处理文本模板
+        text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        # 预处理视觉信息
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = self.processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        ).to(self.model.device)
+        with torch.no_grad():
+            generated_ids = self.model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=0.9,
+                do_sample=True
+            )
+        # 解码输出 (去除输入的token)
+        generated_ids_trimmed = [
+            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        output_text = self.processor.batch_decode(
+            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )
+        return output_text[0]
+    def generate_response_stream(self, messages, max_new_tokens=2048, temperature=0.7):
+        """
+        流式生成响应
+        返回一个生成器，逐个yield生成的文本chunk
+        """
+        # 预处理文本模板
+        text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        # 预处理视觉信息
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = self.processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        ).to(self.model.device)
+        # 创建 TextIteratorStreamer 用于流式输出
+        streamer = TextIteratorStreamer(
+            self.processor.tokenizer,
+            skip_prompt=True,
+            skip_special_tokens=True
+        )
+        # 准备生成参数
+        generation_kwargs = {
+            **inputs,
+            "max_new_tokens": max_new_tokens,
+            "temperature": temperature,
+            "top_p": 0.9,
+            "do_sample": True,
+            "streamer": streamer,
+        }
+        # 在单独的线程中运行生成
+        thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # 逐个yield生成的文本
+        for text_chunk in streamer:
+            yield text_chunk
+        thread.join()

inference/__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (17.8 kB). View file

inference/__pycache__/deepseek_service.cpython-311.pyc ADDED Viewed

Binary file (18.3 kB). View file

inference/__pycache__/model_utils.cpython-311.pyc ADDED Viewed

Binary file (5.39 kB). View file

inference/app.py ADDED Viewed

	@@ -0,0 +1,423 @@

+# app.py
+import uvicorn
+import os
+import shutil
+import uuid
+import json
+import re
+import asyncio
+from typing import Optional
+from io import BytesIO
+from contextlib import asynccontextmanager
+from PIL import Image
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from fastapi.concurrency import run_in_threadpool
+from model_utils import SkinGPTModel
+from deepseek_service import get_deepseek_service, DeepSeekService
+# === Configuration ===
+MODEL_PATH = "../checkpoint"
+TEMP_DIR = "./temp_uploads"
+os.makedirs(TEMP_DIR, exist_ok=True)
+# DeepSeek API Key
+DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "sk-b221f29be052460f9e0fe12d88dd343c")
+# Global DeepSeek service instance
+deepseek_service: Optional[DeepSeekService] = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """应用生命周期管理"""
+    # 启动时初始化 DeepSeek 服务
+    await init_deepseek()
+    yield
+    print("\nShutting down service...")
+app = FastAPI(
+    title="SkinGPT-R1 皮肤诊断系统",
+    description="智能皮肤诊断助手",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# CORS配置 - 允许前端访问
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://localhost:3000", "http://localhost:5173", "http://127.0.0.1:5173", "*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# 全局变量存储状态
+# chat_states: 存储对话历史 (List of messages for Qwen)
+# pending_images: 存储已上传但尚未发送给LLM的图片路径 (State ID -> Image Path)
+chat_states = {}
+pending_images = {}
+def parse_diagnosis_result(raw_text: str) -> dict:
+    """
+    解析诊断结果中的think和answer标签
+    参数:
+    - raw_text: 原始诊断文本
+    返回:
+    - dict: 包含thinking, answer, raw字段的字典
+    """
+    import re
+    # 尝试匹配完整的标签
+    think_match = re.search(r'<think>([\s\S]*?)</think>', raw_text)
+    answer_match = re.search(r'<answer>([\s\S]*?)</answer>', raw_text)
+    thinking = None
+    answer = None
+    # 处理think标签
+    if think_match:
+        thinking = think_match.group(1).strip()
+    else:
+        # 尝试匹配未闭合的think标签（输出被截断的情况）
+        unclosed_think = re.search(r'<think>([\s\S]*?)(?=<answer>|$)', raw_text)
+        if unclosed_think:
+            thinking = unclosed_think.group(1).strip()
+    # 处理answer标签
+    if answer_match:
+        answer = answer_match.group(1).strip()
+    else:
+        # 尝试匹配未闭合的answer标签
+        unclosed_answer = re.search(r'<answer>([\s\S]*?)$', raw_text)
+        if unclosed_answer:
+            answer = unclosed_answer.group(1).strip()
+    # 如果仍然没有找到answer，清理原始文本作为answer
+    if not answer:
+        # 移除所有标签及其内容
+        cleaned = re.sub(r'<think>[\s\S]*?</think>', '', raw_text)
+        cleaned = re.sub(r'<think>[\s\S]*', '', cleaned)  # 移除未闭合的think
+        cleaned = re.sub(r'</?answer>', '', cleaned)  # 移除answer标签
+        cleaned = cleaned.strip()
+        answer = cleaned if cleaned else raw_text
+    # 清理可能残留的标签
+    if answer:
+        answer = re.sub(r'</?think>|</?answer>', '', answer).strip()
+    if thinking:
+        thinking = re.sub(r'</?think>|</?answer>', '', thinking).strip()
+    # 处理 "Final Answer:" 格式，提取其后的内容
+    if answer:
+        final_answer_match = re.search(r'Final Answer:\s*([\s\S]*)', answer, re.IGNORECASE)
+        if final_answer_match:
+            answer = final_answer_match.group(1).strip()
+    return {
+        "thinking": thinking if thinking else None,
+        "answer": answer,
+        "raw": raw_text
+    }
+print("Initializing Model Service...")
+# 全局加载模型
+gpt_model = SkinGPTModel(MODEL_PATH)
+print("Service Ready.")
+# 初始化 DeepSeek 服务（异步）
+async def init_deepseek():
+    global deepseek_service
+    print("\nInitializing DeepSeek service...")
+    deepseek_service = await get_deepseek_service(api_key=DEEPSEEK_API_KEY)
+    if deepseek_service and deepseek_service.is_loaded:
+        print("DeepSeek service is ready!")
+    else:
+        print("DeepSeek service not available, will return raw results")
+@app.post("/v1/upload/{state_id}")
+async def upload_file(state_id: str, file: UploadFile = File(...), survey: str = Form(None)):
+    """
+    接收图片上传。
+    逻辑：将图片保存到本地临时目录，并标记该 state_id 有一张待处理图片。
+    """
+    try:
+        # 1. 保存图片到本地临时文件
+        file_extension = file.filename.split(".")[-1] if "." in file.filename else "jpg"
+        unique_name = f"{state_id}_{uuid.uuid4().hex}.{file_extension}"
+        file_path = os.path.join(TEMP_DIR, unique_name)
+        with open(file_path, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        # 2. 记录图片路径等待下一次 predict 调用时使用
+        # 如果是多图模式，这里可以改成 list，目前演示单图覆盖或更新
+        pending_images[state_id] = file_path
+        # 3. 初始化对话状态（如果是新会话）
+        if state_id not in chat_states:
+            chat_states[state_id] = []
+        return {"message": "Image uploaded successfully", "path": file_path}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
+@app.post("/v1/predict/{state_id}")
+async def v1_predict(request: Request, state_id: str):
+    """
+    接收文本并执行推理。
+    逻辑：检查是否有待处理图片。如果有，将其与文本组合成 multimodal 消息。
+    """
+    try:
+        data = await request.json()
+    except:
+        raise HTTPException(status_code=400, detail="Invalid JSON")
+    user_message = data.get("message", "")
+    if not user_message:
+        raise HTTPException(status_code=400, detail="Missing 'message' field")
+    # 获取或初始化历史
+    history = chat_states.get(state_id, [])
+    # 构建当前轮次的用户内容
+    current_content = []
+    # 1. 检查是否有刚刚上传的图片
+    if state_id in pending_images:
+        img_path = pending_images.pop(state_id) # 取出并移除
+        current_content.append({"type": "image", "image": img_path})
+        # 如果是第一次对话，加上 System Prompt
+        if not history:
+             system_prompt = "You are a professional AI dermatology assistant. "
+             user_message = f"{system_prompt}\n\n{user_message}"
+    # 2. 添加文本
+    current_content.append({"type": "text", "text": user_message})
+    # 3. 更新历史
+    history.append({"role": "user", "content": current_content})
+    chat_states[state_id] = history
+    # 4. 运行推理 (在线程池中运行以防阻塞)
+    try:
+        response_text = await run_in_threadpool(
+            gpt_model.generate_response,
+            messages=history
+        )
+    except Exception as e:
+        # 回滚历史（移除刚才出错的用户提问）
+        chat_states[state_id].pop()
+        raise HTTPException(status_code=500, detail=f"Inference error: {str(e)}")
+    # 5. 将回复加入历史
+    history.append({"role": "assistant", "content": [{"type": "text", "text": response_text}]})
+    chat_states[state_id] = history
+    return {"message": response_text}
+@app.post("/v1/reset/{state_id}")
+async def reset_chat(state_id: str):
+    """清除会话状态"""
+    if state_id in chat_states:
+        del chat_states[state_id]
+    if state_id in pending_images:
+        # 可选：删除临时文件
+        try:
+            os.remove(pending_images[state_id])
+        except:
+            pass
+        del pending_images[state_id]
+    return {"message": "Chat history reset"}
+@app.get("/")
+async def root():
+    """根路径"""
+    return {
+        "name": "SkinGPT-R1 皮肤诊断系统",
+        "version": "1.0.0",
+        "status": "running",
+        "description": "智能皮肤诊断助手"
+    }
+@app.get("/health")
+async def health_check():
+    """健康检查"""
+    return {
+        "status": "healthy",
+        "model_loaded": True
+    }
+@app.post("/diagnose/stream")
+async def diagnose_stream(
+    image: Optional[UploadFile] = File(None),
+    text: str = Form(...),
+    language: str = Form("zh"),
+):
+    """
+    SSE流式诊断接口（用于前端）
+    支持图片上传和文本输入，返回真正的流式响应
+    使用 DeepSeek API 优化输出格式
+    """
+    from queue import Queue, Empty
+    from threading import Thread
+    language = language if language in ("zh", "en") else "zh"
+    # 处理图片
+    pil_image = None
+    temp_image_path = None
+    if image:
+        contents = await image.read()
+        pil_image = Image.open(BytesIO(contents)).convert("RGB")
+    # 创建队列用于线程间通信
+    result_queue = Queue()
+    # 用于存储完整响应和解析结果
+    generation_result = {"full_response": [], "parsed": None, "temp_image_path": None}
+    def run_generation():
+        """在后台线程中运行流式生成"""
+        full_response = []
+        try:
+            # 构建消息
+            messages = []
+            current_content = []
+            # 添加系统提示
+            system_prompt = "You are a professional AI dermatology assistant." if language == "en" else "你是一个专业的AI皮肤科助手。"
+            # 如果有图片，保存到临时文件
+            if pil_image:
+                generation_result["temp_image_path"] = os.path.join(TEMP_DIR, f"temp_{uuid.uuid4().hex}.jpg")
+                pil_image.save(generation_result["temp_image_path"])
+                current_content.append({"type": "image", "image": generation_result["temp_image_path"]})
+            # 添加文本
+            prompt = f"{system_prompt}\n\n{text}"
+            current_content.append({"type": "text", "text": prompt})
+            messages.append({"role": "user", "content": current_content})
+            # 流式生成 - 每个 chunk 立即放入队列
+            for chunk in gpt_model.generate_response_stream(
+                messages=messages,
+                max_new_tokens=2048,
+                temperature=0.7
+            ):
+                full_response.append(chunk)
+                result_queue.put(("delta", chunk))
+            # 解析结果
+            response_text = "".join(full_response)
+            parsed = parse_diagnosis_result(response_text)
+            generation_result["full_response"] = full_response
+            generation_result["parsed"] = parsed
+            # 标记生成完成
+            result_queue.put(("generation_done", None))
+        except Exception as e:
+            result_queue.put(("error", str(e)))
+    async def event_generator():
+        """异步生成SSE事件"""
+        # 在后台线程启动生成（非阻塞）
+        gen_thread = Thread(target=run_generation)
+        gen_thread.start()
+        loop = asyncio.get_event_loop()
+        # 从队列中读取并发送流式内容
+        while True:
+            try:
+                # 非阻塞获取
+                msg_type, data = await loop.run_in_executor(
+                    None,
+                    lambda: result_queue.get(timeout=0.1)
+                )
+                if msg_type == "generation_done":
+                    # 流式生成完成，准备处理最终结果
+                    break
+                elif msg_type == "delta":
+                    yield_chunk = json.dumps({"type": "delta", "text": data}, ensure_ascii=False)
+                    yield f"data: {yield_chunk}\n\n"
+                elif msg_type == "error":
+                    yield f"data: {json.dumps({'type': 'error', 'message': data}, ensure_ascii=False)}\n\n"
+                    gen_thread.join()
+                    return
+            except Empty:
+                # 队列暂时为空，继续等待
+                await asyncio.sleep(0.01)
+                continue
+        gen_thread.join()
+        # 获取解析结果
+        parsed = generation_result["parsed"]
+        if not parsed:
+            yield f"data: {json.dumps({'type': 'error', 'message': 'Failed to parse response'}, ensure_ascii=False)}\n\n"
+            return
+        raw_thinking = parsed["thinking"]
+        raw_answer = parsed["answer"]
+        # 使用 DeepSeek 优化结果
+        refined_by_deepseek = False
+        description = None
+        thinking = raw_thinking
+        answer = raw_answer
+        if deepseek_service and deepseek_service.is_loaded:
+            try:
+                print(f"Calling DeepSeek to refine diagnosis (language={language})...")
+                refined = await deepseek_service.refine_diagnosis(
+                    raw_answer=raw_answer,
+                    raw_thinking=raw_thinking,
+                    language=language,
+                )
+                if refined["success"]:
+                    description = refined["description"]
+                    thinking = refined["analysis_process"]
+                    answer = refined["diagnosis_result"]
+                    refined_by_deepseek = True
+                    print(f"DeepSeek refinement completed successfully")
+            except Exception as e:
+                print(f"DeepSeek refinement failed, using original: {e}")
+        else:
+            print("DeepSeek service not available, using raw results")
+        success_msg = "Diagnosis completed" if language == "en" else "诊断完成"
+        # 返回格式与参考项目保持一致
+        final_payload = {
+            "description": description,              # 图片描述（从 thinking 中提取）
+            "thinking": thinking,                    # 分析过程（DeepSeek 优化后）
+            "answer": answer,                        # 诊断结果（DeepSeek 优化后）
+            "raw": parsed["raw"],                    # 原始响应
+            "refined_by_deepseek": refined_by_deepseek,  # 是否被 DeepSeek 优化
+            "success": True,
+            "message": success_msg
+        }
+        yield_final = json.dumps({"type": "final", "result": final_payload}, ensure_ascii=False)
+        yield f"data: {yield_final}\n\n"
+        # 清理临时图片
+        temp_path = generation_result.get("temp_image_path")
+        if temp_path and os.path.exists(temp_path):
+            try:
+                os.remove(temp_path)
+            except:
+                pass
+    return StreamingResponse(event_generator(), media_type="text/event-stream")
+if __name__ == '__main__':
+    uvicorn.run("app:app", host="0.0.0.0", port=5900, reload=False)

inference/chat.py ADDED Viewed

	@@ -0,0 +1,68 @@

+# chat.py
+import argparse
+import os
+from model_utils import SkinGPTModel
+def main():
+    parser = argparse.ArgumentParser(description="SkinGPT-R1 Multi-turn Chat")
+    parser.add_argument("--model_path", type=str, default="../checkpoint")
+    parser.add_argument("--image", type=str, required=True, help="Path to initial image")
+    args = parser.parse_args()
+    # 初始化模型
+    bot = SkinGPTModel(args.model_path)
+    # 初始化对话历史
+    # 系统提示词
+    system_prompt = "You are a professional AI dermatology assistant. Analyze the skin condition carefully."
+    # 构造第一条包含图片的消息
+    if not os.path.exists(args.image):
+        print(f"Error: Image {args.image} not found.")
+        return
+    history = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": args.image},
+                {"type": "text", "text": f"{system_prompt}\n\nPlease analyze this image."}
+            ]
+        }
+    ]
+    print("\n=== SkinGPT-R1 Chat (Type 'exit' to quit) ===")
+    print(f"Image loaded: {args.image}")
+    # 获取第一轮诊断
+    print("\nModel is thinking...", end="", flush=True)
+    response = bot.generate_response(history)
+    print(f"\rAssistant: {response}\n")
+    # 将助手的回复加入历史
+    history.append({"role": "assistant", "content": [{"type": "text", "text": response}]})
+    # 进入多轮对话循环
+    while True:
+        try:
+            user_input = input("User: ")
+            if user_input.lower() in ["exit", "quit"]:
+                break
+            if not user_input.strip():
+                continue
+            # 加入用户的新问题
+            history.append({"role": "user", "content": [{"type": "text", "text": user_input}]})
+            print("Model is thinking...", end="", flush=True)
+            response = bot.generate_response(history)
+            print(f"\rAssistant: {response}\n")
+            # 加入助手的新回复
+            history.append({"role": "assistant", "content": [{"type": "text", "text": response}]})
+        except KeyboardInterrupt:
+            break
+if __name__ == "__main__":
+    main()

inference/deepseek_service.py ADDED Viewed

	@@ -0,0 +1,384 @@

+"""
+DeepSeek API Service
+Used to optimize and organize SkinGPT model output results
+"""
+import os
+import re
+from typing import Optional
+from openai import AsyncOpenAI
+class DeepSeekService:
+    """DeepSeek API Service Class"""
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize DeepSeek service
+        Parameters:
+            api_key: DeepSeek API key, reads from environment variable if not provided
+        """
+        self.api_key = api_key or os.environ.get("DEEPSEEK_API_KEY")
+        self.base_url = "https://api.deepseek.com"
+        self.model = "deepseek-chat"  # Using deepseek-chat model
+        self.client = None
+        self.is_loaded = False
+        print(f"DeepSeek API service initializing...")
+        print(f"API Base URL: {self.base_url}")
+    async def load(self):
+        """Initialize DeepSeek API client"""
+        try:
+            if not self.api_key:
+                print("DeepSeek API key not provided")
+                self.is_loaded = False
+                return
+            # Initialize OpenAI compatible client
+            self.client = AsyncOpenAI(
+                api_key=self.api_key,
+                base_url=self.base_url
+            )
+            self.is_loaded = True
+            print("DeepSeek API service is ready!")
+        except Exception as e:
+            print(f"DeepSeek API service initialization failed: {e}")
+            self.is_loaded = False
+    async def refine_diagnosis(
+        self,
+        raw_answer: str,
+        raw_thinking: Optional[str] = None,
+        language: str = "zh"
+    ) -> dict:
+        """
+        Use DeepSeek API to optimize and organize diagnosis results
+        Parameters:
+            raw_answer: Original diagnosis result
+            raw_thinking: AI thinking process
+            language: Language option
+        Returns:
+            Dictionary containing "description", "analysis_process" and "diagnosis_result"
+        """
+        if not self.is_loaded or self.client is None:
+            error_msg = "API not initialized, cannot generate analysis" if language == "en" else "API未初始化，无法生成分析过程"
+            print("DeepSeek API not initialized, returning original result")
+            return {
+                "success": False,
+                "description": "",
+                "analysis_process": raw_thinking or error_msg,
+                "diagnosis_result": raw_answer,
+                "original_diagnosis": raw_answer,
+                "error": "DeepSeek API not initialized"
+            }
+        try:
+            # Build prompt
+            prompt = self._build_refine_prompt(raw_answer, raw_thinking, language)
+            # Select system prompt based on language
+            if language == "en":
+                system_content = "You are a professional medical text editor. Your task is to polish and organize medical diagnostic text to make it flow smoothly while preserving the original meaning. Output ONLY the formatted result. Do NOT add any explanations, comments, or thoughts. Just follow the format exactly."
+            else:
+                system_content = "你是医学文本整理专家，按照用户要求将用户输入的文本整理成用户想要的格式，不要改写或总结。"
+            # Call DeepSeek API
+            response = await self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": system_content},
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=0.1,
+                max_tokens=2048,
+                top_p=0.8,
+            )
+            # Extract generated text
+            generated_text = response.choices[0].message.content
+            # Parse output
+            parsed = self._parse_refined_output(generated_text, raw_answer, raw_thinking, language)
+            return {
+                "success": True,
+                "description": parsed["description"],
+                "analysis_process": parsed["analysis_process"],
+                "diagnosis_result": parsed["diagnosis_result"],
+                "original_diagnosis": raw_answer,
+                "raw_refined": generated_text
+            }
+        except Exception as e:
+            print(f"DeepSeek API call failed: {e}")
+            error_msg = "API call failed, cannot generate analysis" if language == "en" else "API调用失败，无法生成分析过程"
+            return {
+                "success": False,
+                "description": "",
+                "analysis_process": raw_thinking or error_msg,
+                "diagnosis_result": raw_answer,
+                "original_diagnosis": raw_answer,
+                "error": str(e)
+            }
+    def _build_refine_prompt(self, raw_answer: str, raw_thinking: Optional[str] = None, language: str = "zh") -> str:
+        """
+        Build optimization prompt
+        Parameters:
+            raw_answer: Original diagnosis result
+            raw_thinking: AI thinking process
+            language: Language option, "zh" for Chinese, "en" for English
+        Returns:
+            Built prompt
+        """
+        if language == "en":
+            # English prompt - organize and polish while preserving meaning
+            thinking_text = raw_thinking if raw_thinking else "No analysis process available."
+            prompt = f"""You are a text organization expert. There are two texts that need to be organized. Text 1 is the thinking process of the SkinGPT model, and Text 2 is the diagnosis result given by SkinGPT.
+【Requirements】
+- Preserve the original tone and expression style
+- Text 1 contains the thinking process, Text 2 contains the diagnosis result
+- Extract the image observation part from the thinking process as Description. This should include all factual observations about what was seen in the image, not just a brief summary.
+- For Diagnostic Reasoning: refine and condense the remaining thinking content. Remove redundancies, self-doubt, circular reasoning, and unnecessary repetition. Keep it concise and not too long. Keep the logical chain clear and enhance readability. IMPORTANT: DO NOT include any image description or visual observations in Diagnostic Reasoning. Only include reasoning, analysis, and diagnostic thought process.
+- If [Text 1] content is NOT: No analysis process available. Then organize [Text 1] content accordingly, DO NOT confuse [Text 1] and [Text 2]
+- If [Text 1] content IS: No analysis process available. Then extract the analysis process and description from [Text 2]
+- DO NOT infer or add new medical information, DO NOT output any meta-commentary
+- You may adjust unreasonable statements or remove redundant content to improve clarity
+[Text 1]
+{thinking_text}
+[Text 2]
+{raw_answer}
+【Output】Only output three sections, do not output anything else:
+## Description
+(Extract all image observation content from the thinking process - include all factual descriptions of what was seen)
+## Analysis Process
+(Refined and condensed diagnostic reasoning: remove self-doubt, circular logic, and redundancies. Keep it concise and not too long. Keep logical flow clear. Do NOT include image observations)
+## Diagnosis Result
+(The organized diagnosis result from Text 2)
+【Example】:
+## Description
+The image shows red inflamed patches on the skin with pustules and darker colored spots. The lesions appear as papules and pustules distributed across the affected area, with some showing signs of inflammation and possible post-inflammatory hyperpigmentation.
+## Analysis Process
+These findings are consistent with acne vulgaris, commonly seen during adolescence. The user's age aligns with typical onset for this condition. Treatment recommendations: over-the-counter medications such as benzoyl peroxide or topical antibiotics, avoiding picking at the skin, and consulting a dermatologist if severe. The goal is to control inflammation and prevent scarring.
+## Diagnosis Result
+Possible diagnosis: Acne (pimples) Explanation: Acne is a common skin condition, especially during adolescence, when hormonal changes cause overactive sebaceous glands, which can easily clog pores and form acne. Pathological care recommendations: 1. Keep face clean, wash face 2-3 times daily, use gentle cleansing products. 2. Avoid squeezing acne with hands to prevent worsening inflammation or leaving scars. 3. Avoid using irritating cosmetics and skincare products. 4. Can use topical medications containing salicylic acid, benzoyl peroxide, etc. 5. If necessary, can use oral antibiotics or other treatment methods under doctor's guidance. Precautions: 1. Avoid rubbing or damaging the affected area to prevent infection. 2. Eat less oily and spicy foods, eat more vegetables and fruits. 3. Maintain good rest habits, avoid staying up late. 4. If acne symptoms persist without improvement or show signs of worsening, seek medical attention promptly.
+"""
+        else:
+            # Chinese prompt - translate to Simplified Chinese AND organize/polish
+            thinking_text = raw_thinking if raw_thinking else "No analysis process available."
+            prompt = f"""你是一个文本整理专家。有两段文本需要整理，文本1是SkinGPT模型的思考过程的文本，文本2是SkinGPT给出的诊断结果的文本。
+【要求】
+- 保留原文的语气和表达方式
+- 文本1是思考过程，文本2是诊断结果
+- 从思考过程中提取图像观察部分作为图像描述。需要包含所有关于图片中观察到的事实内容，不要简化或缩短。
+- 对于分析过程：提炼并精简剩余的思考内容，去除冗余、自我怀疑、兜圈子的内容。保持简洁，不要太长。保持逻辑链条清晰，增强可读性。重要：分析过程中不���包含任何图像描述或视觉观察内容，只包含推理、分析和诊断思考过程。
+- 如果【文本1】内容不是：No analysis process available.那么按要求整理【文本1】的内容，不要混淆【文本1】和【文本2】。
+- 如果【文本1】内容是：No analysis process available.那么从【文本2】提炼分析过程和描述。
+- 【文本1】和【文本2】需要翻译成简体中文
+- 禁止推断或添加新的医学信息，禁止输出任何元评论
+- 可以调整不合理的语句或去除冗余内容以提高清晰度
+【文本1】
+{thinking_text}
+【文本2】
+{raw_answer}
+【输出】只输出三个部分，不要输出其他任何内容：
+## 图像描述
+（从思考过程中提取所有图像观察内容，包含所有关于图片的事实描述）
+## 分析过程
+（提炼并精简后的诊断推理：去除自我怀疑、兜圈逻辑和冗余内容。保持简洁，不要太长。保持逻辑流畅。不包含图像观察）
+## 诊断结果
+（整理后的诊断结果）
+【样例】:
+## 图像描述
+图片显示皮肤上有红色发炎的斑块，伴有脓疱和颜色较深的斑点。病变表现为分布在受影响区域的丘疹和脓疱，部分显示出炎症迹象和可能的炎症后色素沉着。
+## 分析过程
+这些表现符合寻常痤疮的特征，青春期常见。用户的年龄与该病症的典型发病年龄相符。治疗建议：使用非处方药物如过氧化苯甲酰或外用抗生素，避免抠抓皮肤，病情严重时咨询皮肤科医生。目标是控制炎症并防止疤痕形成。
+## 诊断结果
+可能的诊断：痤疮（青春痘） 解释：痤疮是一种常见的皮肤病，特别是在青少年期间，由于激素水平的变化导致皮脂腺过度活跃，容易堵塞毛孔，形成痤疮。 病理护理建议：1.保持面部清洁，每天洗脸2-3次，使用温和的洁面产品。 2.避免用手挤压痤疮，以免加重炎症或留下疤痕。 3.避免使用刺激性的化妆品和护肤品。 4.可以使用含有水杨酸、苯氧醇等成分的外用药物治疗。 5.如有需要，可以在医生指导下使用抗生素口服药或其他治疗方法。 注意事项：1. 避免摩擦或损伤患处，以免引起感染。 2. 饮食上应少吃油腻、辛辣食物，多吃蔬菜水果。 3. 保持良好的作息习惯，避免熬夜。 4. 如果痤疮症状持续不见好转或有恶化的趋势，应及时就医。
+"""
+        return prompt
+    def _parse_refined_output(
+        self,
+        generated_text: str,
+        raw_answer: str,
+        raw_thinking: Optional[str] = None,
+        language: str = "zh"
+    ) -> dict:
+        """
+        Parse DeepSeek generated output
+        Parameters:
+            generated_text: DeepSeek generated text
+            raw_answer: Original diagnosis (as fallback)
+            raw_thinking: Original thinking process (as fallback)
+            language: Language option
+        Returns:
+            Dictionary containing description, analysis_process and diagnosis_result
+        """
+        description = ""
+        analysis_process = None
+        diagnosis_result = None
+        if language == "en":
+            # English patterns
+            desc_match = re.search(
+                r'##\s*Description\s*\n([\s\S]*?)(?=##\s*Analysis\s*Process|$)',
+                generated_text,
+                re.IGNORECASE
+            )
+            analysis_match = re.search(
+                r'##\s*Analysis\s*Process\s*\n([\s\S]*?)(?=##\s*Diagnosis\s*Result|$)',
+                generated_text,
+                re.IGNORECASE
+            )
+            result_match = re.search(
+                r'##\s*Diagnosis\s*Result\s*\n([\s\S]*?)$',
+                generated_text,
+                re.IGNORECASE
+            )
+            desc_header = "## Description"
+            analysis_header = "## Analysis Process"
+            result_header = "## Diagnosis Result"
+        else:
+            # Chinese patterns
+            desc_match = re.search(
+                r'##\s*图像描述\s*\n([\s\S]*?)(?=##\s*分析过程|$)',
+                generated_text
+            )
+            analysis_match = re.search(
+                r'##\s*分析过程\s*\n([\s\S]*?)(?=##\s*诊断结果|$)',
+                generated_text
+            )
+            result_match = re.search(
+                r'##\s*诊断结果\s*\n([\s\S]*?)$',
+                generated_text
+            )
+            desc_header = "## 图像描述"
+            analysis_header = "## 分析过程"
+            result_header = "## 诊断结果"
+        # Extract description
+        if desc_match:
+            description = desc_match.group(1).strip()
+            print(f"Successfully parsed description")
+        else:
+            print(f"Description parsing failed")
+            description = ""
+        # Extract analysis process
+        if analysis_match:
+            analysis_process = analysis_match.group(1).strip()
+            print(f"Successfully parsed analysis process")
+        else:
+            print(f"Analysis process parsing failed, trying other methods")
+            # Try to extract from generated text
+            result_pos = generated_text.find(result_header)
+            if result_pos > 0:
+                # Get content before diagnosis result
+                analysis_process = generated_text[:result_pos].strip()
+                # Remove possible headers
+                for header in [desc_header, analysis_header]:
+                    header_escaped = re.escape(header)
+                    analysis_process = re.sub(f'{header_escaped}\\s*\\n?', '', analysis_process).strip()
+            else:
+                # If no format at all, try to get first half
+                mid_point = len(generated_text) // 2
+                analysis_process = generated_text[:mid_point].strip()
+            # If still empty, use original content (final fallback)
+            if not analysis_process and raw_thinking:
+                print(f"Using original raw_thinking as fallback")
+                analysis_process = raw_thinking
+        # Extract diagnosis result
+        if result_match:
+            diagnosis_result = result_match.group(1).strip()
+            print(f"Successfully parsed diagnosis result")
+        else:
+            print(f"Diagnosis result parsing failed, trying other methods")
+            # Try to extract from generated text
+            result_pos = generated_text.find(result_header)
+            if result_pos > 0:
+                diagnosis_result = generated_text[result_pos:].strip()
+                # Remove possible header
+                result_header_escaped = re.escape(result_header)
+                diagnosis_result = re.sub(f'^{result_header_escaped}\\s*\\n?', '', diagnosis_result).strip()
+            else:
+                # If no format at all, get second half
+                mid_point = len(generated_text) // 2
+                diagnosis_result = generated_text[mid_point:].strip()
+            # If still empty, use original content (final fallback)
+            if not diagnosis_result:
+                print(f"Using original raw_answer as fallback")
+                diagnosis_result = raw_answer
+        return {
+            "description": description,
+            "analysis_process": analysis_process,
+            "diagnosis_result": diagnosis_result
+        }
+# Global DeepSeek service instance (lazy loading)
+_deepseek_service: Optional[DeepSeekService] = None
+async def get_deepseek_service(api_key: Optional[str] = None) -> Optional[DeepSeekService]:
+    """
+    Get DeepSeek service instance (singleton pattern)
+    Parameters:
+        api_key: Optional API key to use
+    Returns:
+        DeepSeekService instance, or None if API initialization fails
+    """
+    global _deepseek_service
+    if _deepseek_service is None:
+        try:
+            _deepseek_service = DeepSeekService(api_key=api_key)
+            await _deepseek_service.load()
+            if not _deepseek_service.is_loaded:
+                print("DeepSeek API service initialization failed, will use fallback mode")
+                return _deepseek_service  # Return instance but marked as not loaded
+        except Exception as e:
+            print(f"DeepSeek service initialization failed: {e}")
+            return None
+    return _deepseek_service

inference/demo.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import torch
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+from PIL import Image
+# === Configuration ===
+MODEL_PATH = "../checkpoint"
+IMAGE_PATH = "test_image.jpg" # Please replace with your actual image path
+PROMPT = "You are a professional AI dermatology assistant. Please analyze this skin image and provide a diagnosis."
+def main():
+    print(f"Loading model from {MODEL_PATH}...")
+    # 1. Load Model
+    try:
+        model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            MODEL_PATH,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+            trust_remote_code=True
+        )
+        processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        return
+    # 2. Check Image
+    import os
+    if not os.path.exists(IMAGE_PATH):
+        print(f"Warning: Image not found at '{IMAGE_PATH}'. Please edit IMAGE_PATH in demo.py")
+        # Create a dummy image for code demonstration purposes if needed, or just return
+        return
+    # 3. Prepare Inputs
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": IMAGE_PATH},
+                {"type": "text", "text": PROMPT},
+            ],
+        }
+    ]
+    print("Processing...")
+    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    ).to(model.device)
+    # 4. Generate
+    with torch.no_grad():
+        generated_ids = model.generate(
+            **inputs,
+            max_new_tokens=1024,
+            temperature=0.7,
+            repetition_penalty=1.2,
+            no_repeat_ngram_size=3,
+            top_p=0.9,
+            do_sample=True
+        )
+    # 5. Decode
+    output_text = processor.batch_decode(
+        generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    print("\n=== Diagnosis Result ===")
+    print(output_text[0])
+    print("========================")
+if __name__ == "__main__":
+    main()

inference/inference.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import argparse
+from model_utils import SkinGPTModel
+import os
+def main():
+    parser = argparse.ArgumentParser(description="SkinGPT-R1 Single Inference")
+    parser.add_argument("--image", type=str, required=True, help="Path to the image")
+    parser.add_argument("--model_path", type=str, default="../checkpoint")
+    parser.add_argument("--prompt", type=str, default="Please analyze this skin image and provide a diagnosis.")
+    args = parser.parse_args()
+    if not os.path.exists(args.image):
+        print(f"Error: Image not found at {args.image}")
+        return
+    # 1. 加载模型 (复用 model_utils)
+    # 这样你就不用在这里重复写 transformers 的加载代码了
+    bot = SkinGPTModel(args.model_path)
+    # 2. 构造单轮消息
+    system_prompt = "You are a professional AI dermatology assistant."
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": args.image},
+                {"type": "text", "text": f"{system_prompt}\n\n{args.prompt}"}
+            ]
+        }
+    ]
+    # 3. 推理
+    print(f"\nAnalyzing {args.image}...")
+    response = bot.generate_response(messages)
+    print("-" * 40)
+    print("Result:")
+    print(response)
+    print("-" * 40)
+if __name__ == "__main__":
+    main()

inference/model_utils.py ADDED Viewed

	@@ -0,0 +1,124 @@

+# model_utils.py
+import torch
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
+from qwen_vl_utils import process_vision_info
+from PIL import Image
+import os
+from threading import Thread
+class SkinGPTModel:
+    def __init__(self, model_path, device=None):
+        self.model_path = model_path
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Loading model from {model_path} on {self.device}...")
+        self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            model_path,
+            torch_dtype=torch.bfloat16 if self.device != "cpu" else torch.float32,
+            attn_implementation="flash_attention_2" if self.device == "cuda" else None,
+            device_map="auto" if self.device != "mps" else None,
+            trust_remote_code=True
+        )
+        if self.device == "mps":
+            self.model = self.model.to(self.device)
+        self.processor = AutoProcessor.from_pretrained(
+            model_path,
+            trust_remote_code=True,
+            min_pixels=256*28*28,
+            max_pixels=1280*28*28
+        )
+        print("Model loaded successfully.")
+    def generate_response(self, messages, max_new_tokens=1024, temperature=0.7, repetition_penalty=1.2, no_repeat_ngram_size=3):
+        """
+        处理多轮对话的历史消息列表并生成回复
+        messages format:
+        [
+            {'role': 'user', 'content': [{'type': 'image', 'image': 'path...'}, {'type': 'text', 'text': '...'}]},
+            {'role': 'assistant', 'content': [{'type': 'text', 'text': '...'}]}
+        ]
+        """
+        # 预处理文本模板
+        text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        # 预处理视觉信息
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = self.processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        ).to(self.model.device)
+        with torch.no_grad():
+            generated_ids = self.model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                repetition_penalty=repetition_penalty,
+                no_repeat_ngram_size=no_repeat_ngram_size,
+                top_p=0.9,
+                do_sample=True
+            )
+        # 解码输出 (去除输入的token)
+        generated_ids_trimmed = [
+            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        output_text = self.processor.batch_decode(
+            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )
+        return output_text[0]
+    def generate_response_stream(self, messages, max_new_tokens=1024, temperature=0.7, repetition_penalty=1.2, no_repeat_ngram_size=3):
+        """
+        流式生成响应
+        返回一个生成器，逐个yield生成的文本chunk
+        """
+        # 预处理文本模板
+        text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        # 预处理视觉信息
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = self.processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        ).to(self.model.device)
+        # 创建 TextIteratorStreamer 用于流式输出
+        streamer = TextIteratorStreamer(
+            self.processor.tokenizer,
+            skip_prompt=True,
+            skip_special_tokens=True
+        )
+        # 准备生成参数
+        generation_kwargs = {
+            **inputs,
+            "max_new_tokens": max_new_tokens,
+            "temperature": temperature,
+            "repetition_penalty": repetition_penalty,
+            "no_repeat_ngram_size": no_repeat_ngram_size,
+            "top_p": 0.9,
+            "do_sample": True,
+            "streamer": streamer,
+        }
+        # 在单独的线程中运行生成
+        thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # 逐个yield生成的文本
+        for text_chunk in streamer:
+            yield text_chunk
+        thread.join()

inference/temp_uploads/.ipynb_checkpoints/temp_d2b1c6f9a43940d2812f10a8cc8bc3ef-checkpoint.jpg ADDED Viewed

inference/temp_uploads/.ipynb_checkpoints/user_1769671453128_43ccc61bfcb64c6bbbabbadfa887591c-checkpoint.jpg ADDED Viewed

inference/temp_uploads/temp_d2b1c6f9a43940d2812f10a8cc8bc3ef.jpg ADDED Viewed

inference/temp_uploads/user_1769671453128_43ccc61bfcb64c6bbbabbadfa887591c.jpg ADDED Viewed