Spaces:

YchKhan
/

Ptt_Endpoints

Sleeping

App Files Files Community

YchKhan commited on Mar 21, 2025

Commit

e7d3262

verified ·

1 Parent(s): 6d5a0f5

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -4

app.py CHANGED Viewed

@@ -1,7 +1,160 @@
-from fastapi import FastAPI
-app = FastAPI()
 @app.get("/")
-def greet_json():
-    return {"Hello": "World!"}

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+from typing import List, Dict, Any, Optional
+import json
+import requests
+from bs4 import BeautifulSoup
+import fitz  # PyMuPDF
+import urllib3
+import pandas as pd
+import io
+from duckduckgo_search import DDGS
+app = FastAPI(title="Patent Analyzer API", description="API for patent search and analysis")
+# Enable CORS for frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, specify your frontend domain
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Define data models
+class SearchRequest(BaseModel):
+    query: str
+class AnalysisRequest(BaseModel):
+    patent_background: str
+    pdf_url: str
+class ExcelExportRequest(BaseModel):
+    tableData: List[Dict[str, Any]]
+    userQuery: Optional[str] = None
 @app.get("/")
+async def root():
+    return {"message": "Patent Analyzer API is running"}
+@app.post("/search")
+async def search(request: SearchRequest):
+    if not request.query:
+        raise HTTPException(status_code=400, detail="No query provided")
+    try:
+        # Remove filetype:pdf if present since DDGS might handle it differently
+        clean_query = request.query.replace('filetype:pdf', '').strip()
+        results = search_web(clean_query, max_references=5)
+        return {"results": results}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error performing search: {str(e)}")
+@app.post("/analyze")
+async def analyze(request: AnalysisRequest):
+    if not request.patent_background or not request.pdf_url:
+        raise HTTPException(status_code=400, detail="Missing required parameters")
+    try:
+        result = analyze_pdf_novelty(request.patent_background, request.pdf_url)
+        return {"result": result}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error analyzing PDF: {str(e)}")
+@app.post("/export-excel")
+async def export_excel(request: ExcelExportRequest):
+    try:
+        if not request.tableData:
+            raise HTTPException(status_code=400, detail="No table data provided")
+        # Create pandas DataFrame from the data
+        df = pd.DataFrame(request.tableData)
+        # Get the user query
+        user_query = request.userQuery or 'No query provided'
+        # Create a BytesIO object to store the Excel file
+        output = io.BytesIO()
+        # Create Excel file with xlsxwriter engine
+        with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
+            # Write the data to a sheet named 'Results'
+            df.to_excel(writer, sheet_name='Results', index=False)
+            # Get workbook and worksheet objects
+            workbook = writer.book
+            worksheet = writer.sheets['Results']
+            # Add a sheet for the query
+            query_sheet = workbook.add_worksheet('Query')
+            query_sheet.write(0, 0, 'Patent Query')
+            query_sheet.write(1, 0, user_query)
+            # Adjust column widths
+            for i, col in enumerate(df.columns):
+                # Get maximum column width
+                max_len = max(
+                    df[col].astype(str).map(len).max(),
+                    len(col)
+                ) + 2
+                # Set column width (limit to 100 to avoid issues)
+                worksheet.set_column(i, i, min(max_len, 100))
+        # Seek to the beginning of the BytesIO object
+        output.seek(0)
+        # Return the Excel file
+        return StreamingResponse(
+            output,
+            media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            headers={"Content-Disposition": "attachment; filename=patent_search_results.xlsx"}
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error exporting Excel: {str(e)}")
+def search_web(topic, max_references=5):
+    """Search the web using DuckDuckGo and return results."""
+    doc_list = []
+    with DDGS(verify=False) as ddgs:
+        i = 0
+        for r in ddgs.text(topic + " filetype:pdf", region='wt-wt', safesearch='On', timelimit='n'):
+            if i >= max_references:
+                break
+            doc_list.append({"title": r['title'], "body": r['body'], "url": r['href']})
+            i += 1
+    return doc_list
+def analyze_pdf_novelty(patent_background, pdf_url):
+    """Extract first page text from PDF and evaluate novelty against patent background"""
+    try:
+        # Disable SSL warnings
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        # Download PDF
+        response = requests.get(pdf_url, timeout=10, verify=False)
+        if response.status_code != 200:
+            return {"error": f"Failed to download PDF (status code: {response.status_code})"}
+        # Extract first page text
+        try:
+            pdf_document = fitz.open(stream=response.content, filetype="pdf")
+            if pdf_document.page_count == 0:
+                return {"error": "PDF has no pages"}
+            first_page = pdf_document.load_page(0)
+            text = first_page.get_text()
+            # Return the extracted text for frontend analysis with OpenAI
+            # We're not doing the analysis here as it will be done in the frontend
+            return {
+                "pdf_text": text,
+                "score": None,
+                "justification": None
+            }
+        except Exception as e:
+            return {"error": f"Error processing PDF: {str(e)}"}
+    except Exception as e:
+        return {"error": f"Error: {str(e)}"}