core-OCR

Paused

prithivMLmods commited on Feb 10

Commit

4890db6

verified ·

1 Parent(s): d5fdad9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -48,9 +48,14 @@ class Model:
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.pipe = ShapEPipeline.from_pretrained("openai/shap-e", torch_dtype=torch.float16)
         self.pipe.to(self.device)
         self.pipe_img = ShapEImg2ImgPipeline.from_pretrained("openai/shap-e-img2img", torch_dtype=torch.float16)
         self.pipe_img.to(self.device)
     def to_glb(self, ply_path: str) -> str:
         mesh = trimesh.load(ply_path)
@@ -288,7 +293,7 @@ def generate(
 ):
     """
     Generates chatbot responses with support for multimodal input, TTS, image generation,
-    and now 3D generation.
     Special commands:
       - "@tts1" or "@tts2": triggers text-to-speech.

         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.pipe = ShapEPipeline.from_pretrained("openai/shap-e", torch_dtype=torch.float16)
         self.pipe.to(self.device)
+        # Ensure the text encoder is in half precision to avoid dtype mismatches.
+        if torch.cuda.is_available():
+            self.pipe.text_encoder = self.pipe.text_encoder.half()
         self.pipe_img = ShapEImg2ImgPipeline.from_pretrained("openai/shap-e-img2img", torch_dtype=torch.float16)
         self.pipe_img.to(self.device)
+        if torch.cuda.is_available():
+            self.pipe_img.text_encoder = self.pipe_img.text_encoder.half()
     def to_glb(self, ply_path: str) -> str:
         mesh = trimesh.load(ply_path)
 ):
     """
     Generates chatbot responses with support for multimodal input, TTS, image generation,
+    and 3D model generation.
     Special commands:
       - "@tts1" or "@tts2": triggers text-to-speech.