Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| import gradio as gr | |
| import pdfplumber | |
| import docx | |
| from transformers import pipeline | |
| # Load Hugging Face token from environment | |
| hf_token = os.environ.get("HF_TOKEN") | |
| # Model ID (Gemma Instruct) | |
| model_id = "google/gemma-7b-it" | |
| # Detect device | |
| device = 0 if torch.cuda.is_available() else -1 | |
| torch_dtype = torch.float16 if device == 0 else torch.float32 | |
| # Load the instruction-following pipeline | |
| pipe = pipeline("text-generation", | |
| model=model_id, | |
| tokenizer=model_id, | |
| use_auth_token=hf_token, | |
| device=device, | |
| torch_dtype=torch_dtype, | |
| max_new_tokens=1024) | |
| # Extract text from PDF | |
| def extract_text_from_pdf(file): | |
| text = "" | |
| with pdfplumber.open(file.name) as pdf: | |
| for page in pdf.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text + "\n" | |
| return text | |
| # Extract text from DOCX | |
| def extract_text_from_docx(file): | |
| doc = docx.Document(file) | |
| return "\n".join([para.text for para in doc.paragraphs if para.text.strip()]) | |
| # Summarize document contents | |
| def summarize_document(file): | |
| if file.name.endswith(".pdf"): | |
| full_text = extract_text_from_pdf(file) | |
| elif file.name.endswith(".docx"): | |
| full_text = extract_text_from_docx(file) | |
| else: | |
| return "β Please upload a PDF or DOCX file." | |
| chunks = [full_text[i:i+1500] for i in range(0, len(full_text), 1500)] | |
| summary = "" | |
| for i, chunk in enumerate(chunks[:3]): | |
| prompt = f"""Read the following technical/tender document chunk and extract these key points: | |
| 1. Number of workers or people required | |
| 2. Timeline or duration for project completion | |
| 3. Technologies, tools, or machines mentioned | |
| 4. Any deadlines, conditions, or legal terms | |
| Document chunk: | |
| {chunk} | |
| Please return only the extracted information in clean bullet points.""" | |
| result = pipe(prompt)[0]["generated_text"] | |
| summary += f"\n\n--- Chunk {i+1} ---\n" + result.split("Document chunk:")[-1].strip() | |
| return summary.strip() | |
| # Gradio Interface | |
| iface = gr.Interface( | |
| fn=summarize_document, | |
| inputs=gr.File(label="π Upload Tender Document (PDF or DOCX)"), | |
| outputs=gr.Textbox(label="π§Ύ Extracted Summary", lines=30), | |
| title="π Smart Tender Analyzer (Gemma-7B)", | |
| description="Upload a tender or technical document (PDF/DOCX). This app extracts important project info using Google's Gemma-7B." | |
| ) | |
| # Launch app (no share=True for Hugging Face Spaces) | |
| iface.launch() | |