RajanMalaviya commited on
Commit
bc73dcf
·
verified ·
1 Parent(s): 3687ca5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -28,8 +28,11 @@ logger = logging.getLogger(__name__)
28
  # Set Tesseract path
29
  pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
30
 
31
- # Load Qwen2-VL-2B-Instruct model on CPU
32
- model_name = "Qwen/Qwen2-VL-2B-Instruct"
 
 
 
33
  try:
34
  model = Qwen2VLForConditionalGeneration.from_pretrained(
35
  model_name,
@@ -38,10 +41,10 @@ try:
38
  low_cpu_mem_usage=True
39
  )
40
  processor = AutoProcessor.from_pretrained(model_name)
41
- logger.info("Qwen2-VL-2B-Instruct model loaded successfully")
42
  except Exception as e:
43
- logger.error(f"Failed to load Qwen2-VL-2B-Instruct model: {str(e)}")
44
- raise HTTPException(status_code=500, detail="Failed to load Qwen2-VL-2B-Instruct model")
45
 
46
  # In-memory caches (1-hour TTL)
47
  raw_text_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
@@ -95,7 +98,7 @@ async def process_pdf_page(img, page_idx):
95
  return ""
96
 
97
  async def process_with_qwen(filename: str, raw_text: str):
98
- """Process raw text with Qwen2-VL-2B-Instruct to extract structured data."""
99
  start_time = time.time()
100
  logger.info(f"Starting Qwen processing for {filename}, {log_memory_usage()}")
101
 
 
28
  # Set Tesseract path
29
  pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
30
 
31
+ # Set cache directory for Hugging Face
32
+ os.environ["HF_HOME"] = "/app/cache"
33
+
34
+ # Load Qwen2.5-VL-2B-Instruct model on CPU
35
+ model_name = "Qwen/Qwen2.5-VL-2B-Instruct"
36
  try:
37
  model = Qwen2VLForConditionalGeneration.from_pretrained(
38
  model_name,
 
41
  low_cpu_mem_usage=True
42
  )
43
  processor = AutoProcessor.from_pretrained(model_name)
44
+ logger.info("Qwen2.5-VL-2B-Instruct model loaded successfully")
45
  except Exception as e:
46
+ logger.error(f"Failed to load Qwen2.5-VL-2B-Instruct model: {str(e)}")
47
+ raise HTTPException(status_code=500, detail="Failed to load Qwen2.5-VL-2B-Instruct model")
48
 
49
  # In-memory caches (1-hour TTL)
50
  raw_text_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
 
98
  return ""
99
 
100
  async def process_with_qwen(filename: str, raw_text: str):
101
+ """Process raw text with Qwen2.5-VL-2B-Instruct to extract structured data."""
102
  start_time = time.time()
103
  logger.info(f"Starting Qwen processing for {filename}, {log_memory_usage()}")
104