Spaces:

Tech-di
/

WallTD-v.1

Sleeping

App Files Files Community

Feriel080 commited on Apr 3, 2025

Commit

04ac6c3

verified ·

1 Parent(s): 0cb4f32

Upload 2 files

Browse files

Files changed (2) hide show

backend/main.py +246 -0
backend/utils.py +96 -0

backend/main.py ADDED Viewed

	@@ -0,0 +1,246 @@

+from fastapi import FastAPI, File, UploadFile, HTTPException, Form
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+import shutil
+from pathlib import Path
+from transformers import (
+    pipeline,
+    AutoProcessor,
+    AutoModelForVision2Seq,
+    M2M100ForConditionalGeneration,
+    M2M100Tokenizer,
+)
+from huggingface_hub import InferenceClient
+from PIL import Image
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+from utils import extract_text, save_file
+import torch
+import easyocr
+from langdetect import detect, DetectorFactory  # for language detection
+app = FastAPI()
+# Initialize Hugging Face models
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
+image_captioner = AutoModelForVision2Seq.from_pretrained(
+    "microsoft/kosmos-2-patch14-224",
+    use_safetensors=True,
+    trust_remote_code=True,
+    torch_dtype=torch.float16,
+)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+image_captioner = image_captioner.to(device)
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translation_model = M2M100ForConditionalGeneration.from_pretrained(
+    "facebook/m2m100_418M"
+)
+question_answering = pipeline(
+    "question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad"
+)
+DetectorFactory.seed = 0
+# Directory to store uploaded and processed files
+UPLOAD_DIR = Path("uploads")
+PROCESSED_DIR = Path("processed")
+UPLOAD_DIR.mkdir(exist_ok=True)
+PROCESSED_DIR.mkdir(exist_ok=True)
+app.mount(
+    "/assets", StaticFiles(directory="../frontend/assets", html=True), name="assets"
+)
+app.mount("/processed", StaticFiles(directory="processed"), name="processed")
+@app.get("/")
+async def serve_frontend():
+    return FileResponse("../frontend/index.html")
+# List processed files
+@app.get("/processed_files")
+async def list_processed_files():
+    files = [f.name for f in PROCESSED_DIR.iterdir() if f.is_file()]
+    return {"files": files}
+# Download a processed file
+@app.get("/download/{filename}")
+async def download_file(filename: str):
+    file_path = PROCESSED_DIR / filename
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="File not found")
+    return FileResponse(file_path, filename=filename)
+# Document & Image Analysis (Summarization & Interpretation)
+@app.post("/docsum_imginter")
+async def docsum_imginter(file: UploadFile = File(...), task: str = Form(...)):
+    file_type = file.filename.split(".")[-1].lower()
+    file_path = UPLOAD_DIR / file.filename
+    output_filename = f"summarized_{file.filename}"
+    output_path = PROCESSED_DIR / output_filename
+    # Save the uploaded file
+    with open(file_path, "wb") as f:
+        shutil.copyfileobj(file.file, f)
+    if file_type in ["docx", "xlsx", "pptx", "pdf", "txt"]:
+        if task.lower() == "summarize":
+            text = extract_text(file_path, file_type)
+            if text is None:
+                raise HTTPException(
+                    status_code=400, detail="Failed to extract text from the document."
+                )
+            if not text.strip():
+                raise HTTPException(
+                    status_code=400, detail="No text found in the document."
+                )
+            original_word_count = len(text.split())
+            if original_word_count < 150:
+                return {
+                    "warning": "Document too short for meaningful summarization",
+                    "original_text": text,
+                    "word_count": original_word_count,
+                }
+            target_length = max(original_word_count // 2, 150)
+            summary = summarizer(
+                "Generate a detailed technical summary (150-200 words)" + text,
+                max_length=target_length,
+                min_length=target_length,
+                do_sample=False,
+                truncation=True,
+            )[0]["summary_text"]
+            save_file(summary, file_path, file_type, output_path)
+            return FileResponse(output_path, filename=output_filename)
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail="Task not supported for documents. Use 'summarize'.",
+            )
+    elif file_type in ["png", "jpg", "jpeg"]:
+        if task.lower() == "interpretation":
+            image = Image.open(file_path)
+            inputs = processor(
+                text="Describe this image in detail including any text:",
+                images=image,
+                return_tensors="pt",
+            ).to(device)
+            generated_ids = image_captioner.generate(
+                pixel_values=inputs["pixel_values"],
+                input_ids=inputs["input_ids"],
+                attention_mask=inputs["attention_mask"],
+                max_new_tokens=200,
+            )
+            caption = processor.decode(generated_ids[0], skip_special_tokens=True)
+            return {"caption": caption}
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail="Task not supported for images. Use 'interpretation'.",
+            )
+    else:
+        raise HTTPException(status_code=400, detail="Unsupported file type.")
+# Intelligent Question Answering (Placeholder)
+@app.post("/ask")
+async def ask(file: UploadFile = File(...), question: str = Form(...)):
+    file_type = file.filename.split(".")[-1].lower()
+    file_path = UPLOAD_DIR / file.filename
+    reader = easyocr.Reader(["en"])
+    with open(file_path, "wb") as f:
+        shutil.copyfileobj(file.file, f)
+    if file_type in ["docx", "xlsx", "pptx", "pdf", "txt"]:
+        text = extract_text(file_path, file_type)
+    elif file_type in ["png", "jpg", "jpeg"]:
+        with Image.open(file.file) as image:
+            text = reader.readtext(image)
+    else:
+        raise HTTPException(status_code=400, detail="Unsupported file type.")
+    if not text:
+        raise HTTPException(
+            status_code=400,
+            detail="The File doesn't contain any text.",
+        )
+    else:
+        result = question_answering(question=question, context=text)
+        return {"answer": result["answer"]}
+# Data Visualization Code Generation
+@app.post("/generate-visualization")
+async def visualization(file: UploadFile = File(...), request: str = Form(...)):
+    return {"message": "Visualisation is not implemented yet."}
+# Text Translation
+@app.post("/translate")
+async def translate(file: UploadFile = File(...), target_language: str = Form(...)):
+    file_type = file.filename.split(".")[-1].lower()
+    file_path = UPLOAD_DIR / file.filename
+    output_filename = f"translated_{file.filename}"
+    output_path = PROCESSED_DIR / output_filename
+    with open(file_path, "wb") as f:
+        shutil.copyfileobj(file.file, f)
+    try:
+        text = extract_text(file_path, file_type)
+        # Auto-detect source language if not provided
+        source_language = detect(text[:1000])  # Check first 1000 chars
+        # Convert to M2M100 language codes
+        source_language = {
+            "en": "en",
+            "fr": "fr",
+            "es": "es",
+            "de": "de",
+            "ar": "ar",
+            "zh": "zh",
+            "ja": "ja",
+            "ru": "ru",
+        }.get(source_language, source_language)
+        # Validate languages
+        supported_languages = tokenizer.lang_code_to_id.keys()
+        if source_language not in supported_languages:
+            raise HTTPException(400, f"Unsupported source language: {source_language}")
+        if target_language not in supported_languages:
+            raise HTTPException(400, f"Unsupported target language: {target_language}")
+        tokenizer.src_lang = source_language
+        encoded_inputs = tokenizer(text, return_tensors="pt")
+        generated_tokens = translation_model.generate(
+            **encoded_inputs, forced_bos_token_id=tokenizer.get_lang_id(target_language)
+        )
+        translated_text = tokenizer.decode(
+            generated_tokens[0], skip_special_tokens=True
+        )
+        save_file(translated_text, file_path, file_type, output_path)
+        return FileResponse(output_path, filename=output_filename)
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail="Task not supported. Use 'translate to [language]'."
+        )

backend/utils.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from pptx import Presentation
+import pdfplumber
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas
+from io import BytesIO
+import docx
+from pathlib import Path
+import openpyxl
+def extract_text(file_path: Path, file_type: str) -> str:
+    text = ""
+    if file_type == "txt":
+        with open(file_path, "r", encoding="utf-8") as f:
+            text = f.read()
+    elif file_type == "docx":
+        doc = docx.Document(file_path)
+        text = "\n".join([para.text for para in doc.paragraphs if para.text])
+    elif file_type == "xlsx":
+        wb = openpyxl.load_workbook(file_path)
+        sheet = wb.active
+        for row in sheet.rows:
+            for cell in row:
+                if cell.value is not None:
+                    text += str(cell.value) + " "
+    elif file_type == "pptx":
+        prs = Presentation(file_path)
+        for slide in prs.slides:
+            for shape in slide.shapes:
+                if shape.has_text_frame:
+                    for paragraph in shape.text_frame.paragraphs:
+                        if (clean_text := paragraph.text.strip()):
+                            text += clean_text + "\n"
+                elif shape.has_table:
+                    for row in shape.table.rows:
+                        for cell in row.cells:
+                            if (cell_text := cell.text.strip()):
+                                text += cell_text + "\n"
+    elif file_type == "pdf":
+        with pdfplumber.open(file_path) as pdf:
+            text = "\n".join(
+                page.extract_text()
+                for page in pdf.pages
+                if page.extract_text()
+            )
+    return text.strip()
+def save_file(text: str, original_path: Path, file_type: str, output_path: Path):
+    if file_type == "docx":
+        doc = docx.Document()
+        doc.add_paragraph(text)
+        doc.save(output_path)
+    elif file_type == "xlsx":
+        wb = openpyxl.Workbook()
+        sheet = wb.active
+        text_lines = text.split(
+            "\n"
+        )
+        for i, line in enumerate(text_lines, start=1):
+            sheet.cell(row=i, column=1, value=line)
+        wb.save(output_path)
+    elif file_type == "pptx":
+        prs = Presentation()
+        slide_layout = prs.slide_layouts[1]
+        slide = prs.slides.add_slide(slide_layout)
+        content = slide.shapes.placeholders[1]
+        content.text = text
+        prs.save(output_path)
+    elif file_type == "pdf":
+         with open(output_path, "wb") as f:
+            pdf_buffer = BytesIO()
+            c = canvas.Canvas(pdf_buffer, pagesize=letter)
+            text_lines = text.split("\n")
+            y = 750
+            for line in text_lines:
+                c.drawString(72, y, line)
+                y -= 12
+                if y < 50:
+                    c.showPage()
+                    y = 750
+            c.save()
+            f.write(pdf_buffer.getvalue())
+    else:
+        with open(output_path, "w", encoding="utf-8") as f:
+            f.write(text)