Spaces:

sejalkishan
/

get-info

Runtime error

App Files Files Community

get-info / app.py

sejalkishan

Update app.py

c167b1b verified 10 months ago

raw

history blame contribute delete

2.58 kB

	import os
	import torch
	import gradio as gr
	import pdfplumber
	import docx
	from transformers import pipeline

	# Load Hugging Face token from environment
	hf_token = os.environ.get("HF_TOKEN")

	# Model ID (Gemma Instruct)
	model_id = "google/gemma-7b-it"

	# Detect device
	device = 0 if torch.cuda.is_available() else -1
	torch_dtype = torch.float16 if device == 0 else torch.float32

	# Load the instruction-following pipeline
	pipe = pipeline("text-generation",
	model=model_id,
	tokenizer=model_id,
	use_auth_token=hf_token,
	device=device,
	torch_dtype=torch_dtype,
	max_new_tokens=1024)

	# Extract text from PDF
	def extract_text_from_pdf(file):
	text = ""
	with pdfplumber.open(file.name) as pdf:
	for page in pdf.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text + "\n"
	return text

	# Extract text from DOCX
	def extract_text_from_docx(file):
	doc = docx.Document(file)
	return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])

	# Summarize document contents
	def summarize_document(file):
	if file.name.endswith(".pdf"):
	full_text = extract_text_from_pdf(file)
	elif file.name.endswith(".docx"):
	full_text = extract_text_from_docx(file)
	else:
	return "❌ Please upload a PDF or DOCX file."

	chunks = [full_text[i:i+1500] for i in range(0, len(full_text), 1500)]
	summary = ""

	for i, chunk in enumerate(chunks[:3]):
	prompt = f"""Read the following technical/tender document chunk and extract these key points:

	1. Number of workers or people required
	2. Timeline or duration for project completion
	3. Technologies, tools, or machines mentioned
	4. Any deadlines, conditions, or legal terms

	Document chunk:
	{chunk}

	Please return only the extracted information in clean bullet points."""
	result = pipe(prompt)[0]["generated_text"]
	summary += f"\n\n--- Chunk {i+1} ---\n" + result.split("Document chunk:")[-1].strip()

	return summary.strip()

	# Gradio Interface
	iface = gr.Interface(
	fn=summarize_document,
	inputs=gr.File(label="📄 Upload Tender Document (PDF or DOCX)"),
	outputs=gr.Textbox(label="🧾 Extracted Summary", lines=30),
	title="📘 Smart Tender Analyzer (Gemma-7B)",
	description="Upload a tender or technical document (PDF/DOCX). This app extracts important project info using Google's Gemma-7B."
	)

	# Launch app (no share=True for Hugging Face Spaces)
	iface.launch()