diabolic6045 commited on
Commit
c9f8b16
Β·
verified Β·
1 Parent(s): d70e615

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -14
app.py CHANGED
@@ -11,7 +11,6 @@ import io
11
  from PIL import Image
12
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
13
  from qwen_vl_utils import process_vision_info
14
- from peft import PeftModel
15
  import os
16
  import logging
17
  import spaces
@@ -22,13 +21,12 @@ logger = logging.getLogger(__name__)
22
 
23
 
24
  # Load model at module level (global scope)
25
- model_path = 'Qwen/Qwen2.5-VL-7B-Instruct'
26
- adapter_path = './outputs/out-qwen2-5-vl'
27
 
28
  logger.info("Loading processor...")
29
  processor = AutoProcessor.from_pretrained(model_path)
30
 
31
- logger.info("Loading base model...")
32
  # Check if CUDA is available, otherwise use CPU
33
  device_map = "auto" if torch.cuda.is_available() else "cpu"
34
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
@@ -37,12 +35,6 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
37
  device_map=device_map
38
  )
39
 
40
- if adapter_path and os.path.exists(adapter_path):
41
- logger.info("Loading LoRA adapters...")
42
- model = PeftModel.from_pretrained(model, adapter_path)
43
- else:
44
- logger.info("No adapter path found, using base model only")
45
-
46
  model.eval()
47
  device = next(model.parameters()).device
48
  logger.info(f"Model loaded on device: {device}")
@@ -134,7 +126,7 @@ def create_gradio_interface():
134
  gr.HTML("""
135
  <div class="main-header">
136
  <h1>πŸ•‰οΈ Sanskrit Text Transcription</h1>
137
- <p>Upload an image containing Sanskrit text and get an accurate transcription using AI</p>
138
  <p><strong>πŸš€ Powered by ZeroGPU:</strong> Dynamic GPU allocation for efficient processing</p>
139
  </div>
140
  """)
@@ -189,13 +181,13 @@ def create_gradio_interface():
189
  check_status_btn = gr.Button("πŸ”„ Check Model Status", size="sm")
190
 
191
  gr.Markdown("""
192
- **Model:** Qwen2.5-VL-7B-Instruct with LoRA fine-tuning
193
 
194
  **Features:**
195
  - Multimodal vision-language model
196
- - Fine-tuned on Sanskrit text data
197
  - Supports various Sanskrit scripts
198
- - High accuracy transcription
199
  """)
200
 
201
 
 
11
  from PIL import Image
12
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
13
  from qwen_vl_utils import process_vision_info
 
14
  import os
15
  import logging
16
  import spaces
 
21
 
22
 
23
  # Load model at module level (global scope)
24
+ model_path = 'diabolic6045/Sanskrit-Qwen2.5-VL-7B-Instruct-OCR'
 
25
 
26
  logger.info("Loading processor...")
27
  processor = AutoProcessor.from_pretrained(model_path)
28
 
29
+ logger.info("Loading Sanskrit OCR model...")
30
  # Check if CUDA is available, otherwise use CPU
31
  device_map = "auto" if torch.cuda.is_available() else "cpu"
32
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 
35
  device_map=device_map
36
  )
37
 
 
 
 
 
 
 
38
  model.eval()
39
  device = next(model.parameters()).device
40
  logger.info(f"Model loaded on device: {device}")
 
126
  gr.HTML("""
127
  <div class="main-header">
128
  <h1>πŸ•‰οΈ Sanskrit Text Transcription</h1>
129
+ <p>Upload an image containing Sanskrit text and get an accurate transcription using the specialized Sanskrit OCR model</p>
130
  <p><strong>πŸš€ Powered by ZeroGPU:</strong> Dynamic GPU allocation for efficient processing</p>
131
  </div>
132
  """)
 
181
  check_status_btn = gr.Button("πŸ”„ Check Model Status", size="sm")
182
 
183
  gr.Markdown("""
184
+ **Model:** diabolic6045/Sanskrit-Qwen2.5-VL-7B-Instruct-OCR
185
 
186
  **Features:**
187
  - Multimodal vision-language model
188
+ - Pre-trained specifically for Sanskrit OCR
189
  - Supports various Sanskrit scripts
190
+ - High accuracy Sanskrit text transcription
191
  """)
192
 
193