Ephraimmm commited on
Commit
1e3b610
·
verified ·
1 Parent(s): f5373d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -50
app.py CHANGED
@@ -51,64 +51,128 @@ class GPTDriveIntegration:
51
 
52
  def get_file_content(self, file_id, mime_type):
53
  """Download and extract text content from file"""
54
- try:
55
- if 'text' in mime_type or 'document' in mime_type:
56
- # For Google Docs, export as plain text
57
- if 'document' in mime_type:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  request = self.drive_service.files().export_media(
59
- fileId=file_id, mimeType='text/plain'
60
  )
61
- else:
62
- request = self.drive_service.files().get_media(fileId=file_id)
 
 
 
 
 
63
 
64
- file_content = io.BytesIO()
65
- downloader = MediaIoBaseDownload(file_content, request)
66
- done = False
67
- while done is False:
68
- status, done = downloader.next_chunk()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- return file_content.getvalue().decode('utf-8')
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- elif 'spreadsheet' in mime_type:
73
- # For Google Sheets, export as CSV
74
- request = self.drive_service.files().export_media(
75
- fileId=file_id, mimeType='text/csv'
76
- )
77
- file_content = io.BytesIO()
78
- downloader = MediaIoBaseDownload(file_content, request)
79
- done = False
80
- while done is False:
81
- status, done = downloader.next_chunk()
82
-
83
- return file_content.getvalue().decode('utf-8')
84
 
85
- elif mime_type == 'application/pdf':
86
- # For PDF files, download binary content and extract text
87
- request = self.drive_service.files().get_media(fileId=file_id)
88
- file_content = io.BytesIO()
89
- downloader = MediaIoBaseDownload(file_content, request)
90
- done = False
91
- while done is False:
92
- status, done = downloader.next_chunk()
93
-
94
- # Extract text from PDF
95
- file_content.seek(0)
96
-
97
- try:
98
- import PyPDF2
99
- pdf_reader = PyPDF2.PdfReader(file_content)
100
- text = ""
101
- for page in pdf_reader.pages:
102
- text += page.extract_text() + "\n"
103
- return text
104
- except ImportError:
105
- return "PDF text extraction requires PyPDF2 library"
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  else:
108
- return "File type not supported for text extraction"
109
-
110
- except Exception as e:
111
- return f"Error reading file: {str(e)}"
112
 
113
  def query_gpt_with_context(self, user_query, file_contents):
114
  """Send query to GPT with file context"""
 
51
 
52
  def get_file_content(self, file_id, mime_type):
53
  """Download and extract text content from file"""
54
+ try:
55
+ if 'text' in mime_type or 'document' in mime_type:
56
+ # For Google Docs, export as plain text
57
+ if 'document' in mime_type:
58
+ request = self.drive_service.files().export_media(
59
+ fileId=file_id, mimeType='text/plain'
60
+ )
61
+ else:
62
+ request = self.drive_service.files().get_media(fileId=file_id)
63
+
64
+ file_content = io.BytesIO()
65
+ downloader = MediaIoBaseDownload(file_content, request)
66
+ done = False
67
+ while done is False:
68
+ status, done = downloader.next_chunk()
69
+
70
+ return file_content.getvalue().decode('utf-8')
71
+
72
+ elif 'spreadsheet' in mime_type:
73
+ # For Google Sheets, export as CSV
74
  request = self.drive_service.files().export_media(
75
+ fileId=file_id, mimeType='text/csv'
76
  )
77
+ file_content = io.BytesIO()
78
+ downloader = MediaIoBaseDownload(file_content, request)
79
+ done = False
80
+ while done is False:
81
+ status, done = downloader.next_chunk()
82
+
83
+ return file_content.getvalue().decode('utf-8')
84
 
85
+ elif mime_type == 'application/pdf':
86
+ # For PDF files, download binary content and extract text
87
+ request = self.drive_service.files().get_media(fileId=file_id)
88
+ file_content = io.BytesIO()
89
+ downloader = MediaIoBaseDownload(file_content, request)
90
+ done = False
91
+ while done is False:
92
+ status, done = downloader.next_chunk()
93
+
94
+ # Extract text from PDF
95
+ file_content.seek(0)
96
+
97
+ try:
98
+ import PyPDF2
99
+ pdf_reader = PyPDF2.PdfReader(file_content)
100
+ text = ""
101
+ for page in pdf_reader.pages:
102
+ text += page.extract_text() + "\n"
103
+ return text
104
+ except ImportError:
105
+ return "PDF text extraction requires PyPDF2 library"
106
 
107
+ else:
108
+ return "File type not supported for text extraction"
109
+
110
+ except Exception as e:
111
+ return f"Error reading file: {str(e)}"
112
+
113
+ def query_gpt_with_context(self, user_query, file_contents):
114
+ """Send query to GPT with file context"""
115
+ context = "\n\n".join([
116
+ f"File: {content['name']}\nContent: {content['text'][:2000]}..."
117
+ for content in file_contents
118
+ ])
119
 
120
+ messages = [
121
+ {
122
+ "role": "system",
123
+ "content": """
124
+ You are an AI assistant that can analyze documents from Google Drive.
125
+ Use the provided file contents to answer user questions."""
126
+ },
127
+ {
128
+ "role": "user",
129
+ "content": f"Context from Google Drive files:\n{context}\n\nUser Question: {user_query}"
130
+ }
131
+ ]
132
 
133
+ response = openai.chat.completions.create(
134
+ model="gpt-4o-mini",
135
+ messages=messages,
136
+ max_tokens=1000
137
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ return response.choices[0].message.content
140
+
141
+ def process_query(self, user_query, search_terms=None):
142
+ """Main function to process user queries"""
143
+ # Extract search terms from query if not provided
144
+ if not search_terms:
145
+ search_terms = user_query.split()[:3] # Simple extraction
146
+
147
+ # Search for relevant files
148
+ files = []
149
+ for term in search_terms:
150
+ files.extend(self.search_files(term))
151
+
152
+ # Remove duplicates
153
+ unique_files = {f['id']: f for f in files}.values()
154
+
155
+ # Get content from top 3 most relevant files
156
+ file_contents = []
157
+ for file in list(unique_files)[:3]:
158
+ content = self.get_file_content(file['id'], file['mimeType'])
159
+ file_contents.append({
160
+ 'name': file['name'],
161
+ 'text': content
162
+ })
163
+
164
+ # Query GPT with context
165
+ if file_contents:
166
+ response = self.query_gpt_with_context(user_query, file_contents)
167
+ return {
168
+ 'answer': response,
169
+ 'sources': [f['name'] for f in file_contents]
170
+ }
171
  else:
172
+ return {
173
+ 'answer': "No relevant files found in your Google Drive.",
174
+ 'sources': []
175
+ }
176
 
177
  def query_gpt_with_context(self, user_query, file_contents):
178
  """Send query to GPT with file context"""