Hadiil commited on
Commit
52a1c3a
·
verified ·
1 Parent(s): 72bab89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -175
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import os
2
  from fastapi import FastAPI, UploadFile, File, HTTPException, Form
3
  from fastapi.staticfiles import StaticFiles
4
  from fastapi.responses import RedirectResponse, JSONResponse
@@ -9,8 +8,6 @@ import io
9
  from docx import Document
10
  import fitz # PyMuPDF
11
  import pandas as pd
12
- import matplotlib.pyplot as plt
13
- import seaborn as sns
14
  import uuid
15
  from transformers import MarianMTModel, MarianTokenizer
16
  from fastapi.middleware.cors import CORSMiddleware
@@ -47,136 +44,58 @@ def read_root():
47
  return RedirectResponse(url="/static/index.html")
48
 
49
  @app.post("/summarize")
50
- async def summarize_text(
51
- file: UploadFile = File(None),
52
- text: str = Form(None)
53
- ):
54
- logger.info(f"Received request: file={file}, text={text}") # Debugging
55
-
56
  if file:
57
- logger.info(f"Received document for summarization: {file.filename}")
58
- try:
59
- text = await extract_text_from_file(file)
60
- except Exception as e:
61
- logger.error(f"Error extracting text from file: {e}")
62
- raise HTTPException(status_code=400, detail=str(e))
63
- elif text:
64
- logger.info("Received manual text for summarization")
65
- else:
66
- logger.error("No file or text provided") # Debugging
67
- raise HTTPException(status_code=400, detail="No file or text provided")
68
 
69
- try:
70
- summary = text_pipeline(f"summarize: {text}", max_length=100)
71
- logger.info(f"Generated summary: {summary[0]['generated_text']}")
72
- return {"summary": summary[0]['generated_text']}
73
- except Exception as e:
74
- logger.error(f"Error during summarization: {e}")
75
- raise HTTPException(status_code=500, detail=str(e))
76
 
77
  @app.post("/caption")
78
  async def caption_image(file: UploadFile = File(...)):
79
- logger.info(f"Received image for captioning: {file.filename}")
80
- try:
81
- image_data = await file.read()
82
- image = Image.open(io.BytesIO(image_data))
83
-
84
- # Validate image format
85
- if image.format not in ["JPEG", "PNG"]:
86
- raise ValueError("Unsupported image format. Please upload a JPEG or PNG file.")
87
-
88
- caption = multimodal_pipeline(image)
89
- logger.info(f"Generated caption: {caption[0]['generated_text']}")
90
- return {"caption": caption[0]['generated_text']}
91
- except Exception as e:
92
- logger.error(f"Error during image captioning: {e}")
93
- raise HTTPException(status_code=400, detail=str(e))
94
 
95
  @app.post("/translate")
96
- async def translate_document(
97
- file: UploadFile = File(...),
98
- target_language: str = Form(...)
99
- ):
100
- logger.info(f"Received document for translation: {file.filename}")
101
- logger.info(f"Target language: {target_language}")
102
-
103
- try:
104
  text = await extract_text_from_file(file)
 
 
105
 
106
- if target_language in translation_models:
107
- model_name = translation_models[target_language]
108
- else:
109
- model_name = "Helsinki-NLP/opus-mt-en-de" # Default to German
110
-
111
- tokenizer = MarianTokenizer.from_pretrained(model_name)
112
- model = MarianMTModel.from_pretrained(model_name)
113
-
114
- translated = model.generate(**tokenizer(text, return_tensors="pt", truncation=True))
115
- translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
116
-
117
- return {"translated_text": translated_text}
118
- except Exception as e:
119
- logger.error(f"Error during document translation: {e}")
120
- raise HTTPException(status_code=500, detail=str(e))
121
 
122
  @app.post("/answer")
123
- async def answer_question(
124
- file: UploadFile = File(None),
125
- text: str = Form(None),
126
- question: str = Form(...)
127
- ):
128
  if file:
129
- logger.info(f"Received document for question answering: {file.filename}")
130
- try:
131
- text = await extract_text_from_file(file)
132
- except Exception as e:
133
- logger.error(f"Error extracting text from file: {e}")
134
- raise HTTPException(status_code=400, detail=str(e))
135
- elif text:
136
- logger.info("Received manual text for question answering")
137
- else:
138
- raise HTTPException(status_code=400, detail="No file or text provided")
139
 
140
- try:
141
- answer = text_pipeline(f"question: {question} context: {text}")
142
- logger.info(f"Generated answer: {answer[0]['generated_text']}")
143
- return {"answer": answer[0]['generated_text']}
144
- except Exception as e:
145
- logger.error(f"Error during question answering: {e}")
146
- raise HTTPException(status_code=500, detail=str(e))
147
 
148
  @app.post("/vqa")
149
  async def visual_question_answering(file: UploadFile = File(...), question: str = Form(...)):
150
- logger.info(f"Received image for visual question answering: {file.filename}")
151
- logger.info(f"Received question: {question}")
152
- try:
153
- image_data = await file.read()
154
- image = Image.open(io.BytesIO(image_data))
155
-
156
- # Validate image format
157
- if image.format not in ["JPEG", "PNG"]:
158
- raise ValueError("Unsupported image format. Please upload a JPEG or PNG file.")
159
-
160
- answer = multimodal_pipeline(image, question=question)
161
- logger.info(f"Generated answer: {answer[0]['generated_text']}")
162
- return {"answer": answer[0]['generated_text']}
163
- except Exception as e:
164
- logger.error(f"Error during visual question answering: {e}")
165
- raise HTTPException(status_code=400, detail=str(e))
166
 
167
  @app.post("/visualize")
168
- async def visualize_data(
169
- file: UploadFile = File(...),
170
- request: str = Form(...)
171
- ):
172
- logger.info(f"Received Excel file for visualization: {file.filename}")
173
- logger.info(f"Received visualization request: {request}")
174
-
175
- try:
176
- df = pd.read_excel(io.BytesIO(await file.read()))
177
-
178
- if "bar" in request.lower():
179
- code = f"""
180
  import matplotlib.pyplot as plt
181
  plt.bar(df['{df.columns[0]}'], df['{df.columns[1]}'])
182
  plt.xlabel('{df.columns[0]}')
@@ -184,8 +103,8 @@ plt.ylabel('{df.columns[1]}')
184
  plt.title('Bar Chart')
185
  plt.show()
186
  """
187
- elif "line" in request.lower():
188
- code = f"""
189
  import matplotlib.pyplot as plt
190
  plt.plot(df['{df.columns[0]}'], df['{df.columns[1]}'])
191
  plt.xlabel('{df.columns[0]}')
@@ -193,70 +112,29 @@ plt.ylabel('{df.columns[1]}')
193
  plt.title('Line Chart')
194
  plt.show()
195
  """
196
- else:
197
- code = f"""
198
  import seaborn as sns
199
  sns.pairplot(df)
200
  plt.show()
201
  """
202
-
203
- code_filename = f"visualization_{uuid.uuid4()}.py"
204
- with open(code_filename, "w") as f:
205
- f.write(code)
206
-
207
- return {"code": code, "filename": code_filename}
208
- except Exception as e:
209
- logger.error(f"Error during visualization code generation: {e}")
210
- raise HTTPException(status_code=500, detail=str(e))
211
 
212
  async def extract_text_from_file(file: UploadFile):
213
- try:
214
- file_content = await file.read()
215
- if not file_content:
216
- logger.error("Uploaded file is empty.")
217
- raise ValueError("Uploaded file is empty.")
218
-
219
- # Check file size (e.g., limit to 10MB)
220
- if len(file_content) > 10 * 1024 * 1024: # 10MB
221
- logger.error("File size exceeds the limit (10MB).")
222
- raise ValueError("File size exceeds the limit (10MB).")
223
-
224
- # Check file type
225
- if not file.filename.lower().endswith((".pdf", ".docx", ".txt")):
226
- logger.error(f"Unsupported files format: {file.filename}")
227
- raise ValueError("Unsupported file format. Please upload a PDF, DOCX, or TXT file.")
228
-
229
- if file.filename.endswith(".pdf"):
230
- try:
231
- # Log the first few bytes of the file for debugging
232
- logger.info(f"First 100 bytes of the file: {file_content[:100]}")
233
-
234
- # Attempt to open the PDF
235
- doc = fitz.open(stream=file_content, filetype="pdf")
236
- text = ""
237
- for page in doc:
238
- text += page.get_text()
239
- return text
240
- except Exception as e:
241
- logger.error(f"Error reading PDF file: {e}")
242
- raise ValueError("Failed to read PDF file. It might be corrupted or not a valid PDF.")
243
- elif file.filename.endswith(".docx"):
244
- try:
245
- doc = Document(io.BytesIO(file_content))
246
- text = "\n".join([para.text for para in doc.paragraphs])
247
- return text
248
- except Exception as e:
249
- logger.error(f"Error reading DOCX file: {e}")
250
- raise ValueError("Failed to read DOCX file. It might be corrupted or not a valid DOCX.")
251
- elif file.filename.endswith(".txt"):
252
- try:
253
- return file_content.decode("utf-8")
254
- except Exception as e:
255
- logger.error(f"Error reading TXT file: {e}")
256
- raise ValueError("Failed to read TXT file. It might be corrupted or not a valid TXT.")
257
- except Exception as e:
258
- logger.error(f"Error extracting text from file: {e}")
259
- raise HTTPException(status_code=400, detail=str(e))
260
 
261
  if __name__ == "__main__":
262
  import uvicorn
 
 
1
  from fastapi import FastAPI, UploadFile, File, HTTPException, Form
2
  from fastapi.staticfiles import StaticFiles
3
  from fastapi.responses import RedirectResponse, JSONResponse
 
8
  from docx import Document
9
  import fitz # PyMuPDF
10
  import pandas as pd
 
 
11
  import uuid
12
  from transformers import MarianMTModel, MarianTokenizer
13
  from fastapi.middleware.cors import CORSMiddleware
 
44
  return RedirectResponse(url="/static/index.html")
45
 
46
  @app.post("/summarize")
47
+ async def summarize_text(file: UploadFile = File(None), text: str = Form(None)):
 
 
 
 
 
48
  if file:
49
+ text = await extract_text_from_file(file)
50
+ elif not text:
51
+ raise HTTPException(status_code=400, detail="No text or file provided")
 
 
 
 
 
 
 
 
52
 
53
+ summary = text_pipeline(f"summarize: {text}", max_length=100)
54
+ return {"summary": summary[0]['generated_text']}
 
 
 
 
 
55
 
56
  @app.post("/caption")
57
  async def caption_image(file: UploadFile = File(...)):
58
+ image_data = await file.read()
59
+ image = Image.open(io.BytesIO(image_data))
60
+ caption = multimodal_pipeline(image)
61
+ return {"caption": caption[0]['generated_text']}
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  @app.post("/translate")
64
+ async def translate_document(file: UploadFile = File(None), text: str = Form(None), target_language: str = Form(...)):
65
+ if file:
 
 
 
 
 
 
66
  text = await extract_text_from_file(file)
67
+ elif not text:
68
+ raise HTTPException(status_code=400, detail="No text or file provided")
69
 
70
+ model_name = translation_models.get(target_language, "Helsinki-NLP/opus-mt-en-de")
71
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
72
+ model = MarianMTModel.from_pretrained(model_name)
73
+ translated = model.generate(**tokenizer(text, return_tensors="pt", truncation=True))
74
+ translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
75
+ return {"translated_text": translated_text}
 
 
 
 
 
 
 
 
 
76
 
77
  @app.post("/answer")
78
+ async def answer_question(file: UploadFile = File(None), text: str = Form(None), question: str = Form(...)):
 
 
 
 
79
  if file:
80
+ text = await extract_text_from_file(file)
81
+ elif not text:
82
+ raise HTTPException(status_code=400, detail="No text or file provided")
 
 
 
 
 
 
 
83
 
84
+ answer = text_pipeline(f"question: {question} context: {text}")
85
+ return {"answer": answer[0]['generated_text']}
 
 
 
 
 
86
 
87
  @app.post("/vqa")
88
  async def visual_question_answering(file: UploadFile = File(...), question: str = Form(...)):
89
+ image_data = await file.read()
90
+ image = Image.open(io.BytesIO(image_data))
91
+ answer = multimodal_pipeline(image, question=question)
92
+ return {"answer": answer[0]['generated_text']}
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  @app.post("/visualize")
95
+ async def visualize_data(file: UploadFile = File(...), request: str = Form(...)):
96
+ df = pd.read_excel(io.BytesIO(await file.read()))
97
+ if "bar" in request.lower():
98
+ code = f"""
 
 
 
 
 
 
 
 
99
  import matplotlib.pyplot as plt
100
  plt.bar(df['{df.columns[0]}'], df['{df.columns[1]}'])
101
  plt.xlabel('{df.columns[0]}')
 
103
  plt.title('Bar Chart')
104
  plt.show()
105
  """
106
+ elif "line" in request.lower():
107
+ code = f"""
108
  import matplotlib.pyplot as plt
109
  plt.plot(df['{df.columns[0]}'], df['{df.columns[1]}'])
110
  plt.xlabel('{df.columns[0]}')
 
112
  plt.title('Line Chart')
113
  plt.show()
114
  """
115
+ else:
116
+ code = f"""
117
  import seaborn as sns
118
  sns.pairplot(df)
119
  plt.show()
120
  """
121
+ return {"code": code}
 
 
 
 
 
 
 
 
122
 
123
  async def extract_text_from_file(file: UploadFile):
124
+ file_content = await file.read()
125
+ if file.filename.endswith(".pdf"):
126
+ doc = fitz.open(stream=file_content, filetype="pdf")
127
+ text = ""
128
+ for page in doc:
129
+ text += page.get_text()
130
+ return text
131
+ elif file.filename.endswith(".docx"):
132
+ doc = Document(io.BytesIO(file_content))
133
+ return "\n".join([para.text for para in doc.paragraphs])
134
+ elif file.filename.endswith(".txt"):
135
+ return file_content.decode("utf-8")
136
+ else:
137
+ raise HTTPException(status_code=400, detail="Unsupported file format")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  if __name__ == "__main__":
140
  import uvicorn