Spaces:

omgy
/

verolabz

Sleeping

App Files Files Community

omgy commited on Oct 23, 2025

Commit

b3ba3c5

verified ·

1 Parent(s): 3795886

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -103

app.py CHANGED Viewed

@@ -11,10 +11,10 @@ import requests
 app = FastAPI()
-# Add CORS middleware to allow frontend to call the API
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # In production, specify your frontend domain
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
@@ -22,6 +22,7 @@ app.add_middleware(
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 @app.get("/")
 async def root():
     return {
@@ -29,138 +30,142 @@ async def root():
         "message": "Document Processing API (Gemini-2.0-Flash only)",
         "endpoints": {
             "POST /process-document": "Processes a document using Gemini-2.0-Flash model"
-        }
     }
 def clean_ai_response(text: str) -> str:
     """
-    Removes conversational phrases from AI responses.
-    Keeps only the actual document content.
     """
-    lines = text.strip().split('\n')
     cleaned_lines = []
-    skip_count = 0
     for i, line in enumerate(lines):
         line_stripped = line.strip().lower()
-        # Skip first few lines if they contain conversational phrases
         if i < 3 and len(line_stripped) < 100:
-            # Check for conversational patterns
-            conversational_keywords = [
-                'sure', 'okay', 'certainly', 'here is', "here's",
-                'this is', 'i have', "i've", 'enhanced', 'expanded',
-                'improved', 'revised', 'version', 'let me', 'below is'
-            ]
-            if any(keyword in line_stripped for keyword in conversational_keywords):
-                skip_count += 1
                 continue
-        # Keep everything else
         cleaned_lines.append(line)
-    return '\n'.join(cleaned_lines).strip()
-@app.post("/process-document")
-async def process_document(
-    file: UploadFile = File(...),
-    user_prompt: str = Form(...)
-):
-    try:
-        # Read uploaded file
-        content = await file.read()
-        filename = file.filename.lower()
-        # Extract text based on file type
-        if filename.endswith('.docx'):
-            text = extract_text_from_docx(content)
-        elif filename.endswith('.txt'):
-            text = content.decode('utf-8', errors='ignore')
-        elif filename.endswith('.pdf'):
-            text = extract_text_from_pdf(content)
-        else:
-            # Try to decode as text first
-            try:
-                text = content.decode('utf-8', errors='ignore')
-            except:
-                return JSONResponse(
-                    {"error": f"Unsupported file type: {file.filename}. Please upload .docx, .txt, or .pdf files."},
-                    status_code=400
-                )
-        if not text.strip():
-            return JSONResponse({"error": "No text content found in the document"}, status_code=400)
-        # Process with Gemini only (hardcoded)
-        result_text = call_gemini_api(text, user_prompt)
-        if not result_text:
-            return JSONResponse({"error": "Empty response from Gemini"}, status_code=500)
-        # Clean conversational text from AI response
-        cleaned_text = clean_ai_response(result_text)
-        # Build output docx
-        output = create_docx_with_layout(cleaned_text)
-        return StreamingResponse(
-            output,
-            media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-            headers={"Content-Disposition": f"attachment; filename=enhanced_{file.filename.replace('.txt', '.docx').replace('.pdf', '.docx')}"}
-        )
-    except Exception as e:
-        error_msg = str(e)
-        print("ERROR:", error_msg)
-        traceback.print_exc()
-        return JSONResponse({"error": error_msg}, status_code=500)
 def extract_text_from_docx(content: bytes) -> str:
-    """Extracts plain text from uploaded DOCX"""
-    try:
-        doc = Document(BytesIO(content))
-        return "\n\n".join([p.text for p in doc.paragraphs if p.text.strip()])
-    except Exception as e:
-        raise ValueError(f"Failed to extract text from DOCX: {str(e)}")
 def extract_text_from_pdf(content: bytes) -> str:
-    """Extracts text from PDF - basic implementation"""
     try:
-        # Try using PyPDF2 if available
         import PyPDF2
         pdf_file = BytesIO(content)
         pdf_reader = PyPDF2.PdfReader(pdf_file)
-        text = ""
-        for page in pdf_reader.pages:
-            text += page.extract_text() + "\n\n"
-        return text
     except ImportError:
-        raise ValueError("PDF processing not available. Please install PyPDF2 or upload a .docx/.txt file.")
-    except Exception as e:
-        raise ValueError(f"Failed to extract text from PDF: {str(e)}")
-def call_gemini_api(text, prompt):
-    """Calls Gemini-2.0-Flash model"""
     if not GEMINI_API_KEY:
         raise ValueError("GEMINI_API_KEY not set")
     url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
     headers = {"Content-Type": "application/json"}
     payload = {
-        "contents": [{
-            "parts": [{
-                "text": f"{prompt}\n\n{text}"
-            }]
-        }]
     }
     res = requests.post(url, headers=headers, json=payload)
     if res.status_code != 200:
         raise Exception(f"Gemini API error: {res.text}")
     data = res.json()
     try:
         return data["candidates"][0]["content"]["parts"][0]["text"]
     except (KeyError, IndexError):
-        raise Exception(f"Unexpected Gemini API response structure: {data}")

 app = FastAPI()
+# ──────────────────────────────── CORS ─────────────────────────────────
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # in production, restrict to frontend domain
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+# ──────────────────────────────── ROOT ─────────────────────────────────
 @app.get("/")
 async def root():
     return {
         "message": "Document Processing API (Gemini-2.0-Flash only)",
         "endpoints": {
             "POST /process-document": "Processes a document using Gemini-2.0-Flash model"
+        },
     }
+# ─────────────────────────────── HELPERS ───────────────────────────────
 def clean_ai_response(text: str) -> str:
     """
+    Removes conversational phrases and keeps only the document content.
     """
+    lines = text.strip().split("\n")
     cleaned_lines = []
     for i, line in enumerate(lines):
         line_stripped = line.strip().lower()
         if i < 3 and len(line_stripped) < 100:
+            if any(
+                kw in line_stripped
+                for kw in [
+                    "sure",
+                    "okay",
+                    "certainly",
+                    "here is",
+                    "here's",
+                    "enhanced",
+                    "revised",
+                    "version",
+                    "below is",
+                ]
+            ):
                 continue
         cleaned_lines.append(line)
+    return "\n".join(cleaned_lines).strip()
+# ──────────���───────────────── EXTRACTORS ───────────────────────────────
 def extract_text_from_docx(content: bytes) -> str:
+    """Extracts plain text from uploaded DOCX."""
+    doc = Document(BytesIO(content))
+    return "\n\n".join([p.text for p in doc.paragraphs if p.text.strip()])
 def extract_text_from_pdf(content: bytes) -> str:
+    """Extracts text from PDF - basic implementation."""
     try:
         import PyPDF2
         pdf_file = BytesIO(content)
         pdf_reader = PyPDF2.PdfReader(pdf_file)
+        return "\n\n".join([page.extract_text() for page in pdf_reader.pages])
     except ImportError:
+        raise ValueError(
+            "PDF processing not available. Please install PyPDF2 or upload .docx/.txt files."
+        )
+# ───────────────────────────── GEMINI CALL ─────────────────────────────
+def call_gemini_api(text: str, user_prompt: str) -> str:
+    """
+    Calls Gemini-2.0-Flash model with both a system instruction and user prompt.
+    """
     if not GEMINI_API_KEY:
         raise ValueError("GEMINI_API_KEY not set")
     url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
     headers = {"Content-Type": "application/json"}
+    # System instruction — controls Gemini’s behavior (acts like a system prompt)
+    system_instruction = (
+        "You are Verolabz, a document enhancement AI. "
+        "Your goal is to enhance and refine the document text while keeping its meaning "
+        "and layout intact. Do not add explanations, introductions, or summaries. "
+        "Return only the improved text with preserved structure and formatting cues."
+    )
+    # Combine instructions and text
     payload = {
+        "system_instruction": {"parts": [{"text": system_instruction}]},
+        "contents": [
+            {
+                "role": "user",
+                "parts": [
+                    {
+                        "text": (
+                            f"User instructions: {user_prompt}\n\n"
+                            f"Document text:\n{text}"
+                        )
+                    }
+                ],
+            }
+        ],
     }
     res = requests.post(url, headers=headers, json=payload)
     if res.status_code != 200:
         raise Exception(f"Gemini API error: {res.text}")
     data = res.json()
     try:
         return data["candidates"][0]["content"]["parts"][0]["text"]
     except (KeyError, IndexError):
+        raise Exception(f"Unexpected Gemini API response: {data}")
+# ────────────────────────────── MAIN ROUTE ─────────────────────────────
+@app.post("/process-document")
+async def process_document(
+    file: UploadFile = File(...),
+    user_prompt: str = Form(...)
+):
+    try:
+        content = await file.read()
+        filename = file.filename.lower()
+        # extract text
+        if filename.endswith(".docx"):
+            text = extract_text_from_docx(content)
+        elif filename.endswith(".pdf"):
+            text = extract_text_from_pdf(content)
+        elif filename.endswith(".txt"):
+            text = content.decode("utf-8", errors="ignore")
+        else:
+            return JSONResponse(
+                {"error": "Unsupported file type. Use .docx, .pdf, or .txt"},
+                status_code=400,
+            )
+        if not text.strip():
+            return JSONResponse({"error": "Document is empty"}, status_code=400)
+        # Call Gemini
+        result_text = call_gemini_api(text, user_prompt)
+        cleaned_text = clean_ai_response(result_text)
+        # Rebuild output DOCX
+        output = create_docx_with_layout(cleaned_text)
+        return StreamingResponse(
+            output,
+            media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            headers={
+                "Content-Disposition": f"attachment; filename=enhanced_{file.filename.replace('.pdf','.docx').replace('.txt','.docx')}"
+            },
+        )
+    except Exception as e:
+        traceback.print_exc()
+        return JSONResponse({"error": str(e)}, status_code=500)