get-info / app.py
sejalkishan's picture
Update app.py
c167b1b verified
import os
import torch
import gradio as gr
import pdfplumber
import docx
from transformers import pipeline
# Load Hugging Face token from environment
hf_token = os.environ.get("HF_TOKEN")
# Model ID (Gemma Instruct)
model_id = "google/gemma-7b-it"
# Detect device
device = 0 if torch.cuda.is_available() else -1
torch_dtype = torch.float16 if device == 0 else torch.float32
# Load the instruction-following pipeline
pipe = pipeline("text-generation",
model=model_id,
tokenizer=model_id,
use_auth_token=hf_token,
device=device,
torch_dtype=torch_dtype,
max_new_tokens=1024)
# Extract text from PDF
def extract_text_from_pdf(file):
text = ""
with pdfplumber.open(file.name) as pdf:
for page in pdf.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text
# Extract text from DOCX
def extract_text_from_docx(file):
doc = docx.Document(file)
return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
# Summarize document contents
def summarize_document(file):
if file.name.endswith(".pdf"):
full_text = extract_text_from_pdf(file)
elif file.name.endswith(".docx"):
full_text = extract_text_from_docx(file)
else:
return "❌ Please upload a PDF or DOCX file."
chunks = [full_text[i:i+1500] for i in range(0, len(full_text), 1500)]
summary = ""
for i, chunk in enumerate(chunks[:3]):
prompt = f"""Read the following technical/tender document chunk and extract these key points:
1. Number of workers or people required
2. Timeline or duration for project completion
3. Technologies, tools, or machines mentioned
4. Any deadlines, conditions, or legal terms
Document chunk:
{chunk}
Please return only the extracted information in clean bullet points."""
result = pipe(prompt)[0]["generated_text"]
summary += f"\n\n--- Chunk {i+1} ---\n" + result.split("Document chunk:")[-1].strip()
return summary.strip()
# Gradio Interface
iface = gr.Interface(
fn=summarize_document,
inputs=gr.File(label="πŸ“„ Upload Tender Document (PDF or DOCX)"),
outputs=gr.Textbox(label="🧾 Extracted Summary", lines=30),
title="πŸ“˜ Smart Tender Analyzer (Gemma-7B)",
description="Upload a tender or technical document (PDF/DOCX). This app extracts important project info using Google's Gemma-7B."
)
# Launch app (no share=True for Hugging Face Spaces)
iface.launch()