Spaces:

Tech-di
/

WallTD-v.1

Sleeping

App Files Files Community

Feriel080 commited on Apr 3, 2025

Commit

465d22f

verified ·

1 Parent(s): a0c5ef2

Upload main.py

Browse files

Files changed (1) hide show

backend/main.py +2 -139

backend/main.py CHANGED Viewed

@@ -3,50 +3,13 @@ from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
 import shutil
 from pathlib import Path
-from transformers import (
-    pipeline,
-    AutoProcessor,
-    AutoModelForVision2Seq,
-    # M2M100ForConditionalGeneration,
-    # M2M100Tokenizer,
-    # AutoConfig
-)
-# from huggingface_hub import InferenceClient
-from PIL import Image
-# import matplotlib.pyplot as plt
-# import seaborn as sns
-# import numpy as np
 from utils import extract_text, save_file
-import torch
-# import easyocr
-# from langdetect import detect, DetectorFactory  # for language detection
 app = FastAPI()
 # Initialize Hugging Face models
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
-image_captioner = AutoModelForVision2Seq.from_pretrained(
-    "microsoft/kosmos-2-patch14-224",
-    use_safetensors=True,
-    trust_remote_code=True,
-    torch_dtype=torch.float16,
-)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-image_captioner = image_captioner.to(device)
-# config = AutoConfig.from_pretrained("microsoft/kosmos-2-patch14-224", trust_remote_code=True)
-# image_captioner = AutoModelForVision2Seq.from_config(config, trust_remote_code=True)
-# tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
-# translation_model = M2M100ForConditionalGeneration.from_pretrained(
-#     "facebook/m2m100_418M"
-# )
-# question_answering = pipeline(
-#     "question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad"
-# )
-# DetectorFactory.seed = 0
 # Directory to store uploaded and processed files
 UPLOAD_DIR = Path("uploads")
@@ -129,31 +92,7 @@ async def docsum_imginter(file: UploadFile = File(...), task: str = Form(...)):
                 detail="Task not supported for documents. Use 'summarize'.",
             )
     elif file_type in ["png", "jpg", "jpeg"]:
-        if task.lower() == "interpretation":
-            image = Image.open(file_path)
-            inputs = processor(
-                text="Describe this image in detail including any text",
-                images=image,
-                return_tensors="pt",
-            ).to(device)
-            generated_ids = image_captioner.generate(
-                pixel_values=inputs["pixel_values"],
-                input_ids=inputs["input_ids"],
-                attention_mask=inputs["attention_mask"],
-                max_new_tokens=200,
-                image_embeds=None,
-                image_embeds_position_mask=inputs["image_embeds_position_mask"],
-                use_cache=True,
-            )
-            caption = processor.decode(generated_ids, skip_special_tokens=True)[0]
-            return {"caption": caption}
-        else:
-            raise HTTPException(
-                status_code=400,
-                detail="Task not supported for images. Use 'interpretation'.",
-            )
     else:
         raise HTTPException(status_code=400, detail="Unsupported file type.")
@@ -161,32 +100,6 @@ async def docsum_imginter(file: UploadFile = File(...), task: str = Form(...)):
 # Intelligent Question Answering (Placeholder)
 @app.post("/ask")
 async def ask(file: UploadFile = File(...), question: str = Form(...)):
-    # file_type = file.filename.split(".")[-1].lower()
-    # file_path = UPLOAD_DIR / file.filename
-    # reader = easyocr.Reader(["en"])
-    # with open(file_path, "wb") as f:
-    #     shutil.copyfileobj(file.file, f)
-    # if file_type in ["docx", "xlsx", "pptx", "pdf", "txt"]:
-    #     text = extract_text(file_path, file_type)
-    # elif file_type in ["png", "jpg", "jpeg"]:
-    #     with Image.open(file.file) as image:
-    #         text = reader.readtext(image)
-    # else:
-    #     raise HTTPException(status_code=400, detail="Unsupported file type.")
-    # if not text:
-    #     raise HTTPException(
-    #         status_code=400,
-    #         detail="The File doesn't contain any text.",
-    #     )
-    # else:
-    #     result = question_answering(question=question, context=text)
-    #     return {"answer": result["answer"]}
     return {"message": "Not implemented yet."}
@@ -199,54 +112,4 @@ async def visualization(file: UploadFile = File(...), request: str = Form(...)):
 # Text Translation
 @app.post("/translate")
 async def translate(file: UploadFile = File(...), target_language: str = Form(...)):
-    # file_type = file.filename.split(".")[-1].lower()
-    # file_path = UPLOAD_DIR / file.filename
-    # output_filename = f"translated_{file.filename}"
-    # output_path = PROCESSED_DIR / output_filename
-    # with open(file_path, "wb") as f:
-    #     shutil.copyfileobj(file.file, f)
-    # try:
-    #     text = extract_text(file_path, file_type)
-    #     # Auto-detect source language if not provided
-    #     source_language = detect(text[:1000])  # Check first 1000 chars
-    #     # Convert to M2M100 language codes
-    #     source_language = {
-    #         "en": "en",
-    #         "fr": "fr",
-    #         "es": "es",
-    #         "de": "de",
-    #         "ar": "ar",
-    #         "zh": "zh",
-    #         "ja": "ja",
-    #         "ru": "ru",
-    #     }.get(source_language, source_language)
-    #     # Validate languages
-    #     supported_languages = tokenizer.lang_code_to_id.keys()
-    #     if source_language not in supported_languages:
-    #         raise HTTPException(400, f"Unsupported source language: {source_language}")
-    #     if target_language not in supported_languages:
-    #         raise HTTPException(400, f"Unsupported target language: {target_language}")
-    #     tokenizer.src_lang = source_language
-    #     encoded_inputs = tokenizer(text, return_tensors="pt")
-    #     generated_tokens = translation_model.generate(
-    #         **encoded_inputs, forced_bos_token_id=tokenizer.get_lang_id(target_language)
-    #     )
-    #     translated_text = tokenizer.decode(
-    #         generated_tokens[0], skip_special_tokens=True
-    #     )
-    #     save_file(translated_text, file_path, file_type, output_path)
-    #     return FileResponse(output_path, filename=output_filename)
-    # except Exception as e:
-    #     raise HTTPException(
-    #         status_code=500, detail="Task not supported. Use 'translate to [language]'."
-    #     )
     return {"message": "Not implemented yet."}

 from fastapi.staticfiles import StaticFiles
 import shutil
 from pathlib import Path
+from transformers import pipeline
 from utils import extract_text, save_file
 app = FastAPI()
 # Initialize Hugging Face models
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 # Directory to store uploaded and processed files
 UPLOAD_DIR = Path("uploads")
                 detail="Task not supported for documents. Use 'summarize'.",
             )
     elif file_type in ["png", "jpg", "jpeg"]:
+        return {"message": "Not implemented yet."}
     else:
         raise HTTPException(status_code=400, detail="Unsupported file type.")
 # Intelligent Question Answering (Placeholder)
 @app.post("/ask")
 async def ask(file: UploadFile = File(...), question: str = Form(...)):
     return {"message": "Not implemented yet."}
 # Text Translation
 @app.post("/translate")
 async def translate(file: UploadFile = File(...), target_language: str = Form(...)):
     return {"message": "Not implemented yet."}