Spaces:

sejalkishan
/

doc-sum

Build error

App Files Files Community

sejalkishan commited on Jul 12, 2025

Commit

8ac0d0d

verified ·

1 Parent(s): 5aa66ed

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -7,18 +7,19 @@ from PIL import Image
 import pytesseract
 import torch
 import os
-# 🔐 Authenticate with Hugging Face token
-login(token=os.environ.get("HF_TOKEN"))
-# Check for GPU
 if not torch.cuda.is_available():
     raise RuntimeError("❌ GPU not detected! Please enable GPU in Space settings.")
-# Model
 model_id = "mistralai/Mistral-7B-Instruct-v0.2"
-# ⛏️ OCR Fallback for image-based PDFs
 def extract_text_from_pdf(file):
     text = ""
     with pdfplumber.open(file) as pdf:
@@ -49,7 +50,7 @@ def chunk_text(text, max_chars=6000):
         chunks.append(current_chunk)
     return chunks
-# 📋 US Tender Q&A prompt
 def create_prompt(text_chunk):
     return f"""You are an expert in analyzing U.S. government tenders. For the following document chunk, extract answers to the listed questions. Format your response in Q&A.
@@ -89,7 +90,8 @@ DOCUMENT CHUNK:
 {text_chunk}
 """
-# 🧠 Main analyzer
 def analyze_document(file, status_box, cancel_flag):
     filename = file.name
     ext = os.path.splitext(filename)[-1].lower()
@@ -107,12 +109,12 @@ def analyze_document(file, status_box, cancel_flag):
     chunks = chunk_text(raw_text)
     full_summary = ""
-    tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ.get("HF_TOKEN"))
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         device_map="auto",
         torch_dtype=torch.float16,
-        token=os.environ.get("HF_TOKEN"),
         trust_remote_code=True
     )
     generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
@@ -131,9 +133,10 @@ def analyze_document(file, status_box, cancel_flag):
     status_box.value = "✅ Completed"
     return full_summary
-# 🎨 Gradio UI
 with gr.Blocks(title="Smart Tender Analyzer - US Edition") as demo:
     gr.Markdown("## 📄 US Tender Analyzer – Document Intelligence (OCR + GPU-Accelerated)")
     with gr.Row():
         with gr.Column(scale=1):
             file_input = gr.File(label="📎 Upload Tender Document (PDF/DOCX)")

 import pytesseract
 import torch
 import os
+import spaces
+# 🔐 Hugging Face Token Authentication
+login(token=os.environ.get("token"))
+# 🔍 Check GPU availability
 if not torch.cuda.is_available():
     raise RuntimeError("❌ GPU not detected! Please enable GPU in Space settings.")
+# 🧠 Model details
 model_id = "mistralai/Mistral-7B-Instruct-v0.2"
+# 📄 OCR + Text Extractor
 def extract_text_from_pdf(file):
     text = ""
     with pdfplumber.open(file) as pdf:
         chunks.append(current_chunk)
     return chunks
+# 🧾 Prompt template for US tender evaluation
 def create_prompt(text_chunk):
     return f"""You are an expert in analyzing U.S. government tenders. For the following document chunk, extract answers to the listed questions. Format your response in Q&A.
 {text_chunk}
 """
+# ✅ GPU Decorated Main Function
+@spaces.GPU(duration=600)  # Allocate GPU for up to 10 minutes
 def analyze_document(file, status_box, cancel_flag):
     filename = file.name
     ext = os.path.splitext(filename)[-1].lower()
     chunks = chunk_text(raw_text)
     full_summary = ""
+    tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ.get("token"))
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         device_map="auto",
         torch_dtype=torch.float16,
+        token=os.environ.get("token"),
         trust_remote_code=True
     )
     generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
     status_box.value = "✅ Completed"
     return full_summary
+# 🌐 Gradio UI
 with gr.Blocks(title="Smart Tender Analyzer - US Edition") as demo:
     gr.Markdown("## 📄 US Tender Analyzer – Document Intelligence (OCR + GPU-Accelerated)")
     with gr.Row():
         with gr.Column(scale=1):
             file_input = gr.File(label="📎 Upload Tender Document (PDF/DOCX)")