Spaces:

satvaSolutions
/

Transaction_Reconciliation

Sleeping

RajanMalaviya commited on May 9, 2025

Commit

bc73dcf

verified ·

1 Parent(s): 3687ca5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -28,8 +28,11 @@ logger = logging.getLogger(__name__)
 # Set Tesseract path
 pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
-# Load Qwen2-VL-2B-Instruct model on CPU
-model_name = "Qwen/Qwen2-VL-2B-Instruct"
 try:
     model = Qwen2VLForConditionalGeneration.from_pretrained(
         model_name,
@@ -38,10 +41,10 @@ try:
         low_cpu_mem_usage=True
     )
     processor = AutoProcessor.from_pretrained(model_name)
-    logger.info("Qwen2-VL-2B-Instruct model loaded successfully")
 except Exception as e:
-    logger.error(f"Failed to load Qwen2-VL-2B-Instruct model: {str(e)}")
-    raise HTTPException(status_code=500, detail="Failed to load Qwen2-VL-2B-Instruct model")
 # In-memory caches (1-hour TTL)
 raw_text_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
@@ -95,7 +98,7 @@ async def process_pdf_page(img, page_idx):
         return ""
 async def process_with_qwen(filename: str, raw_text: str):
-    """Process raw text with Qwen2-VL-2B-Instruct to extract structured data."""
     start_time = time.time()
     logger.info(f"Starting Qwen processing for {filename}, {log_memory_usage()}")

 # Set Tesseract path
 pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
+# Set cache directory for Hugging Face
+os.environ["HF_HOME"] = "/app/cache"
+# Load Qwen2.5-VL-2B-Instruct model on CPU
+model_name = "Qwen/Qwen2.5-VL-2B-Instruct"
 try:
     model = Qwen2VLForConditionalGeneration.from_pretrained(
         model_name,
         low_cpu_mem_usage=True
     )
     processor = AutoProcessor.from_pretrained(model_name)
+    logger.info("Qwen2.5-VL-2B-Instruct model loaded successfully")
 except Exception as e:
+    logger.error(f"Failed to load Qwen2.5-VL-2B-Instruct model: {str(e)}")
+    raise HTTPException(status_code=500, detail="Failed to load Qwen2.5-VL-2B-Instruct model")
 # In-memory caches (1-hour TTL)
 raw_text_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
         return ""
 async def process_with_qwen(filename: str, raw_text: str):
+    """Process raw text with Qwen2.5-VL-2B-Instruct to extract structured data."""
     start_time = time.time()
     logger.info(f"Starting Qwen processing for {filename}, {log_memory_usage()}")