Feriel080 commited on
Commit
465d22f
·
verified ·
1 Parent(s): a0c5ef2

Upload main.py

Browse files
Files changed (1) hide show
  1. backend/main.py +2 -139
backend/main.py CHANGED
@@ -3,50 +3,13 @@ from fastapi.responses import FileResponse
3
  from fastapi.staticfiles import StaticFiles
4
  import shutil
5
  from pathlib import Path
6
- from transformers import (
7
- pipeline,
8
- AutoProcessor,
9
- AutoModelForVision2Seq,
10
- # M2M100ForConditionalGeneration,
11
- # M2M100Tokenizer,
12
- # AutoConfig
13
- )
14
- # from huggingface_hub import InferenceClient
15
- from PIL import Image
16
- # import matplotlib.pyplot as plt
17
- # import seaborn as sns
18
- # import numpy as np
19
  from utils import extract_text, save_file
20
- import torch
21
- # import easyocr
22
- # from langdetect import detect, DetectorFactory # for language detection
23
 
24
  app = FastAPI()
25
 
26
  # Initialize Hugging Face models
27
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
28
- processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
29
- image_captioner = AutoModelForVision2Seq.from_pretrained(
30
- "microsoft/kosmos-2-patch14-224",
31
- use_safetensors=True,
32
- trust_remote_code=True,
33
- torch_dtype=torch.float16,
34
- )
35
- device = "cuda" if torch.cuda.is_available() else "cpu"
36
- image_captioner = image_captioner.to(device)
37
- # config = AutoConfig.from_pretrained("microsoft/kosmos-2-patch14-224", trust_remote_code=True)
38
- # image_captioner = AutoModelForVision2Seq.from_config(config, trust_remote_code=True)
39
-
40
- # tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
41
- # translation_model = M2M100ForConditionalGeneration.from_pretrained(
42
- # "facebook/m2m100_418M"
43
- # )
44
- # question_answering = pipeline(
45
- # "question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad"
46
- # )
47
-
48
- # DetectorFactory.seed = 0
49
-
50
 
51
  # Directory to store uploaded and processed files
52
  UPLOAD_DIR = Path("uploads")
@@ -129,31 +92,7 @@ async def docsum_imginter(file: UploadFile = File(...), task: str = Form(...)):
129
  detail="Task not supported for documents. Use 'summarize'.",
130
  )
131
  elif file_type in ["png", "jpg", "jpeg"]:
132
- if task.lower() == "interpretation":
133
- image = Image.open(file_path)
134
- inputs = processor(
135
- text="Describe this image in detail including any text",
136
- images=image,
137
- return_tensors="pt",
138
- ).to(device)
139
-
140
- generated_ids = image_captioner.generate(
141
- pixel_values=inputs["pixel_values"],
142
- input_ids=inputs["input_ids"],
143
- attention_mask=inputs["attention_mask"],
144
- max_new_tokens=200,
145
- image_embeds=None,
146
- image_embeds_position_mask=inputs["image_embeds_position_mask"],
147
- use_cache=True,
148
- )
149
-
150
- caption = processor.decode(generated_ids, skip_special_tokens=True)[0]
151
- return {"caption": caption}
152
- else:
153
- raise HTTPException(
154
- status_code=400,
155
- detail="Task not supported for images. Use 'interpretation'.",
156
- )
157
  else:
158
  raise HTTPException(status_code=400, detail="Unsupported file type.")
159
 
@@ -161,32 +100,6 @@ async def docsum_imginter(file: UploadFile = File(...), task: str = Form(...)):
161
  # Intelligent Question Answering (Placeholder)
162
  @app.post("/ask")
163
  async def ask(file: UploadFile = File(...), question: str = Form(...)):
164
- # file_type = file.filename.split(".")[-1].lower()
165
- # file_path = UPLOAD_DIR / file.filename
166
- # reader = easyocr.Reader(["en"])
167
-
168
- # with open(file_path, "wb") as f:
169
- # shutil.copyfileobj(file.file, f)
170
-
171
- # if file_type in ["docx", "xlsx", "pptx", "pdf", "txt"]:
172
- # text = extract_text(file_path, file_type)
173
-
174
- # elif file_type in ["png", "jpg", "jpeg"]:
175
- # with Image.open(file.file) as image:
176
- # text = reader.readtext(image)
177
-
178
- # else:
179
- # raise HTTPException(status_code=400, detail="Unsupported file type.")
180
-
181
- # if not text:
182
- # raise HTTPException(
183
- # status_code=400,
184
- # detail="The File doesn't contain any text.",
185
- # )
186
-
187
- # else:
188
- # result = question_answering(question=question, context=text)
189
- # return {"answer": result["answer"]}
190
  return {"message": "Not implemented yet."}
191
 
192
 
@@ -199,54 +112,4 @@ async def visualization(file: UploadFile = File(...), request: str = Form(...)):
199
  # Text Translation
200
  @app.post("/translate")
201
  async def translate(file: UploadFile = File(...), target_language: str = Form(...)):
202
- # file_type = file.filename.split(".")[-1].lower()
203
- # file_path = UPLOAD_DIR / file.filename
204
- # output_filename = f"translated_{file.filename}"
205
- # output_path = PROCESSED_DIR / output_filename
206
-
207
- # with open(file_path, "wb") as f:
208
- # shutil.copyfileobj(file.file, f)
209
-
210
- # try:
211
- # text = extract_text(file_path, file_type)
212
-
213
- # # Auto-detect source language if not provided
214
-
215
- # source_language = detect(text[:1000]) # Check first 1000 chars
216
- # # Convert to M2M100 language codes
217
- # source_language = {
218
- # "en": "en",
219
- # "fr": "fr",
220
- # "es": "es",
221
- # "de": "de",
222
- # "ar": "ar",
223
- # "zh": "zh",
224
- # "ja": "ja",
225
- # "ru": "ru",
226
- # }.get(source_language, source_language)
227
-
228
- # # Validate languages
229
- # supported_languages = tokenizer.lang_code_to_id.keys()
230
- # if source_language not in supported_languages:
231
- # raise HTTPException(400, f"Unsupported source language: {source_language}")
232
- # if target_language not in supported_languages:
233
- # raise HTTPException(400, f"Unsupported target language: {target_language}")
234
-
235
- # tokenizer.src_lang = source_language
236
- # encoded_inputs = tokenizer(text, return_tensors="pt")
237
- # generated_tokens = translation_model.generate(
238
- # **encoded_inputs, forced_bos_token_id=tokenizer.get_lang_id(target_language)
239
- # )
240
- # translated_text = tokenizer.decode(
241
- # generated_tokens[0], skip_special_tokens=True
242
- # )
243
-
244
- # save_file(translated_text, file_path, file_type, output_path)
245
-
246
- # return FileResponse(output_path, filename=output_filename)
247
-
248
- # except Exception as e:
249
- # raise HTTPException(
250
- # status_code=500, detail="Task not supported. Use 'translate to [language]'."
251
- # )
252
  return {"message": "Not implemented yet."}
 
3
  from fastapi.staticfiles import StaticFiles
4
  import shutil
5
  from pathlib import Path
6
+ from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
7
  from utils import extract_text, save_file
 
 
 
8
 
9
  app = FastAPI()
10
 
11
  # Initialize Hugging Face models
12
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  # Directory to store uploaded and processed files
15
  UPLOAD_DIR = Path("uploads")
 
92
  detail="Task not supported for documents. Use 'summarize'.",
93
  )
94
  elif file_type in ["png", "jpg", "jpeg"]:
95
+ return {"message": "Not implemented yet."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  else:
97
  raise HTTPException(status_code=400, detail="Unsupported file type.")
98
 
 
100
  # Intelligent Question Answering (Placeholder)
101
  @app.post("/ask")
102
  async def ask(file: UploadFile = File(...), question: str = Form(...)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  return {"message": "Not implemented yet."}
104
 
105
 
 
112
  # Text Translation
113
  @app.post("/translate")
114
  async def translate(file: UploadFile = File(...), target_language: str = Form(...)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  return {"message": "Not implemented yet."}