YchKhan commited on
Commit
e7d3262
·
verified ·
1 Parent(s): 6d5a0f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -4
app.py CHANGED
@@ -1,7 +1,160 @@
1
- from fastapi import FastAPI
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  @app.get("/")
6
- def greet_json():
7
- return {"Hello": "World!"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import StreamingResponse
4
+ from pydantic import BaseModel
5
+ from typing import List, Dict, Any, Optional
6
+ import json
7
+ import requests
8
+ from bs4 import BeautifulSoup
9
+ import fitz # PyMuPDF
10
+ import urllib3
11
+ import pandas as pd
12
+ import io
13
+ from duckduckgo_search import DDGS
14
 
15
+ app = FastAPI(title="Patent Analyzer API", description="API for patent search and analysis")
16
+
17
+ # Enable CORS for frontend
18
+ app.add_middleware(
19
+ CORSMiddleware,
20
+ allow_origins=["*"], # In production, specify your frontend domain
21
+ allow_credentials=True,
22
+ allow_methods=["*"],
23
+ allow_headers=["*"],
24
+ )
25
+
26
+ # Define data models
27
+ class SearchRequest(BaseModel):
28
+ query: str
29
+
30
+ class AnalysisRequest(BaseModel):
31
+ patent_background: str
32
+ pdf_url: str
33
+
34
+ class ExcelExportRequest(BaseModel):
35
+ tableData: List[Dict[str, Any]]
36
+ userQuery: Optional[str] = None
37
 
38
  @app.get("/")
39
+ async def root():
40
+ return {"message": "Patent Analyzer API is running"}
41
+
42
+ @app.post("/search")
43
+ async def search(request: SearchRequest):
44
+ if not request.query:
45
+ raise HTTPException(status_code=400, detail="No query provided")
46
+
47
+ try:
48
+ # Remove filetype:pdf if present since DDGS might handle it differently
49
+ clean_query = request.query.replace('filetype:pdf', '').strip()
50
+ results = search_web(clean_query, max_references=5)
51
+ return {"results": results}
52
+ except Exception as e:
53
+ raise HTTPException(status_code=500, detail=f"Error performing search: {str(e)}")
54
+
55
+ @app.post("/analyze")
56
+ async def analyze(request: AnalysisRequest):
57
+ if not request.patent_background or not request.pdf_url:
58
+ raise HTTPException(status_code=400, detail="Missing required parameters")
59
+
60
+ try:
61
+ result = analyze_pdf_novelty(request.patent_background, request.pdf_url)
62
+ return {"result": result}
63
+ except Exception as e:
64
+ raise HTTPException(status_code=500, detail=f"Error analyzing PDF: {str(e)}")
65
+
66
+ @app.post("/export-excel")
67
+ async def export_excel(request: ExcelExportRequest):
68
+ try:
69
+ if not request.tableData:
70
+ raise HTTPException(status_code=400, detail="No table data provided")
71
+
72
+ # Create pandas DataFrame from the data
73
+ df = pd.DataFrame(request.tableData)
74
+
75
+ # Get the user query
76
+ user_query = request.userQuery or 'No query provided'
77
+
78
+ # Create a BytesIO object to store the Excel file
79
+ output = io.BytesIO()
80
+
81
+ # Create Excel file with xlsxwriter engine
82
+ with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
83
+ # Write the data to a sheet named 'Results'
84
+ df.to_excel(writer, sheet_name='Results', index=False)
85
+
86
+ # Get workbook and worksheet objects
87
+ workbook = writer.book
88
+ worksheet = writer.sheets['Results']
89
+
90
+ # Add a sheet for the query
91
+ query_sheet = workbook.add_worksheet('Query')
92
+ query_sheet.write(0, 0, 'Patent Query')
93
+ query_sheet.write(1, 0, user_query)
94
+
95
+ # Adjust column widths
96
+ for i, col in enumerate(df.columns):
97
+ # Get maximum column width
98
+ max_len = max(
99
+ df[col].astype(str).map(len).max(),
100
+ len(col)
101
+ ) + 2
102
+ # Set column width (limit to 100 to avoid issues)
103
+ worksheet.set_column(i, i, min(max_len, 100))
104
+
105
+ # Seek to the beginning of the BytesIO object
106
+ output.seek(0)
107
+
108
+ # Return the Excel file
109
+ return StreamingResponse(
110
+ output,
111
+ media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
112
+ headers={"Content-Disposition": "attachment; filename=patent_search_results.xlsx"}
113
+ )
114
+
115
+ except Exception as e:
116
+ raise HTTPException(status_code=500, detail=f"Error exporting Excel: {str(e)}")
117
+
118
+ def search_web(topic, max_references=5):
119
+ """Search the web using DuckDuckGo and return results."""
120
+ doc_list = []
121
+ with DDGS(verify=False) as ddgs:
122
+ i = 0
123
+ for r in ddgs.text(topic + " filetype:pdf", region='wt-wt', safesearch='On', timelimit='n'):
124
+ if i >= max_references:
125
+ break
126
+ doc_list.append({"title": r['title'], "body": r['body'], "url": r['href']})
127
+ i += 1
128
+ return doc_list
129
+
130
+ def analyze_pdf_novelty(patent_background, pdf_url):
131
+ """Extract first page text from PDF and evaluate novelty against patent background"""
132
+ try:
133
+ # Disable SSL warnings
134
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
135
+
136
+ # Download PDF
137
+ response = requests.get(pdf_url, timeout=10, verify=False)
138
+ if response.status_code != 200:
139
+ return {"error": f"Failed to download PDF (status code: {response.status_code})"}
140
+
141
+ # Extract first page text
142
+ try:
143
+ pdf_document = fitz.open(stream=response.content, filetype="pdf")
144
+ if pdf_document.page_count == 0:
145
+ return {"error": "PDF has no pages"}
146
+
147
+ first_page = pdf_document.load_page(0)
148
+ text = first_page.get_text()
149
+
150
+ # Return the extracted text for frontend analysis with OpenAI
151
+ # We're not doing the analysis here as it will be done in the frontend
152
+ return {
153
+ "pdf_text": text,
154
+ "score": None,
155
+ "justification": None
156
+ }
157
+ except Exception as e:
158
+ return {"error": f"Error processing PDF: {str(e)}"}
159
+ except Exception as e:
160
+ return {"error": f"Error: {str(e)}"}