Spaces:

Ephraimmm
/

studybuddy

Sleeping

App Files Files Community

Ephraimmm commited on Jun 23

Commit

1e3b610

verified ·

1 Parent(s): f5373d7

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -50

app.py CHANGED Viewed

@@ -51,64 +51,128 @@ class GPTDriveIntegration:
     def get_file_content(self, file_id, mime_type):
         """Download and extract text content from file"""
-    try:
-        if 'text' in mime_type or 'document' in mime_type:
-            # For Google Docs, export as plain text
-            if 'document' in mime_type:
                 request = self.drive_service.files().export_media(
-                    fileId=file_id, mimeType='text/plain'
                 )
-            else:
-                request = self.drive_service.files().get_media(fileId=file_id)
-            file_content = io.BytesIO()
-            downloader = MediaIoBaseDownload(file_content, request)
-            done = False
-            while done is False:
-                status, done = downloader.next_chunk()
-            return file_content.getvalue().decode('utf-8')
-        elif 'spreadsheet' in mime_type:
-            # For Google Sheets, export as CSV
-            request = self.drive_service.files().export_media(
-                fileId=file_id, mimeType='text/csv'
-            )
-            file_content = io.BytesIO()
-            downloader = MediaIoBaseDownload(file_content, request)
-            done = False
-            while done is False:
-                status, done = downloader.next_chunk()
-        return file_content.getvalue().decode('utf-8')
-        elif mime_type == 'application/pdf':
-            # For PDF files, download binary content and extract text
-            request = self.drive_service.files().get_media(fileId=file_id)
-            file_content = io.BytesIO()
-            downloader = MediaIoBaseDownload(file_content, request)
-            done = False
-            while done is False:
-                status, done = downloader.next_chunk()
-            # Extract text from PDF
-            file_content.seek(0)
-            try:
-                import PyPDF2
-                pdf_reader = PyPDF2.PdfReader(file_content)
-                text = ""
-                for page in pdf_reader.pages:
-                    text += page.extract_text() + "\n"
-                return text
-            except ImportError:
-                return "PDF text extraction requires PyPDF2 library"
         else:
-            return "File type not supported for text extraction"
-    except Exception as e:
-        return f"Error reading file: {str(e)}"
     def query_gpt_with_context(self, user_query, file_contents):
         """Send query to GPT with file context"""

     def get_file_content(self, file_id, mime_type):
         """Download and extract text content from file"""
+        try:
+            if 'text' in mime_type or 'document' in mime_type:
+                # For Google Docs, export as plain text
+                if 'document' in mime_type:
+                    request = self.drive_service.files().export_media(
+                        fileId=file_id, mimeType='text/plain'
+                    )
+                else:
+                    request = self.drive_service.files().get_media(fileId=file_id)
+                file_content = io.BytesIO()
+                downloader = MediaIoBaseDownload(file_content, request)
+                done = False
+                while done is False:
+                    status, done = downloader.next_chunk()
+                return file_content.getvalue().decode('utf-8')
+            elif 'spreadsheet' in mime_type:
+                # For Google Sheets, export as CSV
                 request = self.drive_service.files().export_media(
+                    fileId=file_id, mimeType='text/csv'
                 )
+                file_content = io.BytesIO()
+                downloader = MediaIoBaseDownload(file_content, request)
+                done = False
+                while done is False:
+                    status, done = downloader.next_chunk()
+                return file_content.getvalue().decode('utf-8')
+            elif mime_type == 'application/pdf':
+                # For PDF files, download binary content and extract text
+                request = self.drive_service.files().get_media(fileId=file_id)
+                file_content = io.BytesIO()
+                downloader = MediaIoBaseDownload(file_content, request)
+                done = False
+                while done is False:
+                    status, done = downloader.next_chunk()
+                # Extract text from PDF
+                file_content.seek(0)
+                try:
+                    import PyPDF2
+                    pdf_reader = PyPDF2.PdfReader(file_content)
+                    text = ""
+                    for page in pdf_reader.pages:
+                        text += page.extract_text() + "\n"
+                    return text
+                except ImportError:
+                    return "PDF text extraction requires PyPDF2 library"
+            else:
+                return "File type not supported for text extraction"
+        except Exception as e:
+            return f"Error reading file: {str(e)}"
+    def query_gpt_with_context(self, user_query, file_contents):
+        """Send query to GPT with file context"""
+        context = "\n\n".join([
+            f"File: {content['name']}\nContent: {content['text'][:2000]}..."
+            for content in file_contents
+        ])
+        messages = [
+            {
+                "role": "system",
+                "content": """
+                You are an AI assistant that can analyze documents from Google Drive.
+                Use the provided file contents to answer user questions."""
+            },
+            {
+                "role": "user",
+                "content": f"Context from Google Drive files:\n{context}\n\nUser Question: {user_query}"
+            }
+        ]
+        response = openai.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=messages,
+            max_tokens=1000
+        )
+        return response.choices[0].message.content
+    def process_query(self, user_query, search_terms=None):
+        """Main function to process user queries"""
+        # Extract search terms from query if not provided
+        if not search_terms:
+            search_terms = user_query.split()[:3]  # Simple extraction
+        # Search for relevant files
+        files = []
+        for term in search_terms:
+            files.extend(self.search_files(term))
+        # Remove duplicates
+        unique_files = {f['id']: f for f in files}.values()
+        # Get content from top 3 most relevant files
+        file_contents = []
+        for file in list(unique_files)[:3]:
+            content = self.get_file_content(file['id'], file['mimeType'])
+            file_contents.append({
+                'name': file['name'],
+                'text': content
+            })
+        # Query GPT with context
+        if file_contents:
+            response = self.query_gpt_with_context(user_query, file_contents)
+            return {
+                'answer': response,
+                'sources': [f['name'] for f in file_contents]
+            }
         else:
+            return {
+                'answer': "No relevant files found in your Google Drive.",
+                'sources': []
+            }
     def query_gpt_with_context(self, user_query, file_contents):
         """Send query to GPT with file context"""