omgy commited on
Commit
b3ba3c5
·
verified ·
1 Parent(s): 3795886

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -103
app.py CHANGED
@@ -11,10 +11,10 @@ import requests
11
 
12
  app = FastAPI()
13
 
14
- # Add CORS middleware to allow frontend to call the API
15
  app.add_middleware(
16
  CORSMiddleware,
17
- allow_origins=["*"], # In production, specify your frontend domain
18
  allow_credentials=True,
19
  allow_methods=["*"],
20
  allow_headers=["*"],
@@ -22,6 +22,7 @@ app.add_middleware(
22
 
23
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
24
 
 
25
  @app.get("/")
26
  async def root():
27
  return {
@@ -29,138 +30,142 @@ async def root():
29
  "message": "Document Processing API (Gemini-2.0-Flash only)",
30
  "endpoints": {
31
  "POST /process-document": "Processes a document using Gemini-2.0-Flash model"
32
- }
33
  }
34
 
 
35
  def clean_ai_response(text: str) -> str:
36
  """
37
- Removes conversational phrases from AI responses.
38
- Keeps only the actual document content.
39
  """
40
- lines = text.strip().split('\n')
41
  cleaned_lines = []
42
- skip_count = 0
43
-
44
  for i, line in enumerate(lines):
45
  line_stripped = line.strip().lower()
46
-
47
- # Skip first few lines if they contain conversational phrases
48
  if i < 3 and len(line_stripped) < 100:
49
- # Check for conversational patterns
50
- conversational_keywords = [
51
- 'sure', 'okay', 'certainly', 'here is', "here's",
52
- 'this is', 'i have', "i've", 'enhanced', 'expanded',
53
- 'improved', 'revised', 'version', 'let me', 'below is'
54
- ]
55
-
56
- if any(keyword in line_stripped for keyword in conversational_keywords):
57
- skip_count += 1
 
 
 
 
 
58
  continue
59
-
60
- # Keep everything else
61
  cleaned_lines.append(line)
62
-
63
- return '\n'.join(cleaned_lines).strip()
64
-
65
- @app.post("/process-document")
66
- async def process_document(
67
- file: UploadFile = File(...),
68
- user_prompt: str = Form(...)
69
- ):
70
- try:
71
- # Read uploaded file
72
- content = await file.read()
73
- filename = file.filename.lower()
74
-
75
- # Extract text based on file type
76
- if filename.endswith('.docx'):
77
- text = extract_text_from_docx(content)
78
- elif filename.endswith('.txt'):
79
- text = content.decode('utf-8', errors='ignore')
80
- elif filename.endswith('.pdf'):
81
- text = extract_text_from_pdf(content)
82
- else:
83
- # Try to decode as text first
84
- try:
85
- text = content.decode('utf-8', errors='ignore')
86
- except:
87
- return JSONResponse(
88
- {"error": f"Unsupported file type: {file.filename}. Please upload .docx, .txt, or .pdf files."},
89
- status_code=400
90
- )
91
-
92
- if not text.strip():
93
- return JSONResponse({"error": "No text content found in the document"}, status_code=400)
94
-
95
- # Process with Gemini only (hardcoded)
96
- result_text = call_gemini_api(text, user_prompt)
97
-
98
- if not result_text:
99
- return JSONResponse({"error": "Empty response from Gemini"}, status_code=500)
100
-
101
- # Clean conversational text from AI response
102
- cleaned_text = clean_ai_response(result_text)
103
-
104
- # Build output docx
105
- output = create_docx_with_layout(cleaned_text)
106
- return StreamingResponse(
107
- output,
108
- media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
109
- headers={"Content-Disposition": f"attachment; filename=enhanced_{file.filename.replace('.txt', '.docx').replace('.pdf', '.docx')}"}
110
- )
111
- except Exception as e:
112
- error_msg = str(e)
113
- print("ERROR:", error_msg)
114
- traceback.print_exc()
115
- return JSONResponse({"error": error_msg}, status_code=500)
116
 
 
117
  def extract_text_from_docx(content: bytes) -> str:
118
- """Extracts plain text from uploaded DOCX"""
119
- try:
120
- doc = Document(BytesIO(content))
121
- return "\n\n".join([p.text for p in doc.paragraphs if p.text.strip()])
122
- except Exception as e:
123
- raise ValueError(f"Failed to extract text from DOCX: {str(e)}")
124
 
125
  def extract_text_from_pdf(content: bytes) -> str:
126
- """Extracts text from PDF - basic implementation"""
127
  try:
128
- # Try using PyPDF2 if available
129
  import PyPDF2
 
130
  pdf_file = BytesIO(content)
131
  pdf_reader = PyPDF2.PdfReader(pdf_file)
132
- text = ""
133
- for page in pdf_reader.pages:
134
- text += page.extract_text() + "\n\n"
135
- return text
136
  except ImportError:
137
- raise ValueError("PDF processing not available. Please install PyPDF2 or upload a .docx/.txt file.")
138
- except Exception as e:
139
- raise ValueError(f"Failed to extract text from PDF: {str(e)}")
140
 
141
- def call_gemini_api(text, prompt):
142
- """Calls Gemini-2.0-Flash model"""
 
 
 
143
  if not GEMINI_API_KEY:
144
  raise ValueError("GEMINI_API_KEY not set")
145
-
146
  url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
147
  headers = {"Content-Type": "application/json"}
 
 
 
 
 
 
 
 
 
 
148
  payload = {
149
- "contents": [{
150
- "parts": [{
151
- "text": f"{prompt}\n\n{text}"
152
- }]
153
- }]
 
 
 
 
 
 
 
 
 
154
  }
155
-
156
  res = requests.post(url, headers=headers, json=payload)
157
-
158
  if res.status_code != 200:
159
  raise Exception(f"Gemini API error: {res.text}")
160
-
161
  data = res.json()
162
-
163
  try:
164
  return data["candidates"][0]["content"]["parts"][0]["text"]
165
  except (KeyError, IndexError):
166
- raise Exception(f"Unexpected Gemini API response structure: {data}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  app = FastAPI()
13
 
14
+ # ──────────────────────────────── CORS ─────────────────────────────────
15
  app.add_middleware(
16
  CORSMiddleware,
17
+ allow_origins=["*"], # in production, restrict to frontend domain
18
  allow_credentials=True,
19
  allow_methods=["*"],
20
  allow_headers=["*"],
 
22
 
23
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
24
 
25
+ # ──────────────────────────────── ROOT ─────────────────────────────────
26
  @app.get("/")
27
  async def root():
28
  return {
 
30
  "message": "Document Processing API (Gemini-2.0-Flash only)",
31
  "endpoints": {
32
  "POST /process-document": "Processes a document using Gemini-2.0-Flash model"
33
+ },
34
  }
35
 
36
+ # ─────────────────────────────── HELPERS ───────────────────────────────
37
  def clean_ai_response(text: str) -> str:
38
  """
39
+ Removes conversational phrases and keeps only the document content.
 
40
  """
41
+ lines = text.strip().split("\n")
42
  cleaned_lines = []
 
 
43
  for i, line in enumerate(lines):
44
  line_stripped = line.strip().lower()
 
 
45
  if i < 3 and len(line_stripped) < 100:
46
+ if any(
47
+ kw in line_stripped
48
+ for kw in [
49
+ "sure",
50
+ "okay",
51
+ "certainly",
52
+ "here is",
53
+ "here's",
54
+ "enhanced",
55
+ "revised",
56
+ "version",
57
+ "below is",
58
+ ]
59
+ ):
60
  continue
 
 
61
  cleaned_lines.append(line)
62
+ return "\n".join(cleaned_lines).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ # ──────────���───────────────── EXTRACTORS ───────────────────────────────
65
  def extract_text_from_docx(content: bytes) -> str:
66
+ """Extracts plain text from uploaded DOCX."""
67
+ doc = Document(BytesIO(content))
68
+ return "\n\n".join([p.text for p in doc.paragraphs if p.text.strip()])
 
 
 
69
 
70
  def extract_text_from_pdf(content: bytes) -> str:
71
+ """Extracts text from PDF - basic implementation."""
72
  try:
 
73
  import PyPDF2
74
+
75
  pdf_file = BytesIO(content)
76
  pdf_reader = PyPDF2.PdfReader(pdf_file)
77
+ return "\n\n".join([page.extract_text() for page in pdf_reader.pages])
 
 
 
78
  except ImportError:
79
+ raise ValueError(
80
+ "PDF processing not available. Please install PyPDF2 or upload .docx/.txt files."
81
+ )
82
 
83
+ # ───────────────────────────── GEMINI CALL ─────────────────────────────
84
+ def call_gemini_api(text: str, user_prompt: str) -> str:
85
+ """
86
+ Calls Gemini-2.0-Flash model with both a system instruction and user prompt.
87
+ """
88
  if not GEMINI_API_KEY:
89
  raise ValueError("GEMINI_API_KEY not set")
90
+
91
  url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
92
  headers = {"Content-Type": "application/json"}
93
+
94
+ # System instruction — controls Gemini’s behavior (acts like a system prompt)
95
+ system_instruction = (
96
+ "You are Verolabz, a document enhancement AI. "
97
+ "Your goal is to enhance and refine the document text while keeping its meaning "
98
+ "and layout intact. Do not add explanations, introductions, or summaries. "
99
+ "Return only the improved text with preserved structure and formatting cues."
100
+ )
101
+
102
+ # Combine instructions and text
103
  payload = {
104
+ "system_instruction": {"parts": [{"text": system_instruction}]},
105
+ "contents": [
106
+ {
107
+ "role": "user",
108
+ "parts": [
109
+ {
110
+ "text": (
111
+ f"User instructions: {user_prompt}\n\n"
112
+ f"Document text:\n{text}"
113
+ )
114
+ }
115
+ ],
116
+ }
117
+ ],
118
  }
119
+
120
  res = requests.post(url, headers=headers, json=payload)
 
121
  if res.status_code != 200:
122
  raise Exception(f"Gemini API error: {res.text}")
123
+
124
  data = res.json()
 
125
  try:
126
  return data["candidates"][0]["content"]["parts"][0]["text"]
127
  except (KeyError, IndexError):
128
+ raise Exception(f"Unexpected Gemini API response: {data}")
129
+
130
+ # ────────────────────────────── MAIN ROUTE ─────────────────────────────
131
+ @app.post("/process-document")
132
+ async def process_document(
133
+ file: UploadFile = File(...),
134
+ user_prompt: str = Form(...)
135
+ ):
136
+ try:
137
+ content = await file.read()
138
+ filename = file.filename.lower()
139
+
140
+ # extract text
141
+ if filename.endswith(".docx"):
142
+ text = extract_text_from_docx(content)
143
+ elif filename.endswith(".pdf"):
144
+ text = extract_text_from_pdf(content)
145
+ elif filename.endswith(".txt"):
146
+ text = content.decode("utf-8", errors="ignore")
147
+ else:
148
+ return JSONResponse(
149
+ {"error": "Unsupported file type. Use .docx, .pdf, or .txt"},
150
+ status_code=400,
151
+ )
152
+
153
+ if not text.strip():
154
+ return JSONResponse({"error": "Document is empty"}, status_code=400)
155
+
156
+ # Call Gemini
157
+ result_text = call_gemini_api(text, user_prompt)
158
+ cleaned_text = clean_ai_response(result_text)
159
+
160
+ # Rebuild output DOCX
161
+ output = create_docx_with_layout(cleaned_text)
162
+ return StreamingResponse(
163
+ output,
164
+ media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
165
+ headers={
166
+ "Content-Disposition": f"attachment; filename=enhanced_{file.filename.replace('.pdf','.docx').replace('.txt','.docx')}"
167
+ },
168
+ )
169
+ except Exception as e:
170
+ traceback.print_exc()
171
+ return JSONResponse({"error": str(e)}, status_code=500)