Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
### Google Docs API (Fetching Text)
|
| 2 |
|
| 3 |
-
import os.path
|
| 4 |
from typing import List
|
| 5 |
|
| 6 |
from google.auth.transport.requests import Request
|
|
@@ -11,15 +11,16 @@ from googleapiclient.errors import HttpError
|
|
| 11 |
from langchain.schema import Document
|
| 12 |
|
| 13 |
|
| 14 |
-
TOKEN_PATH =
|
| 15 |
-
CREDENTIALS_PATH =
|
|
|
|
| 16 |
|
| 17 |
SCOPES = [
|
| 18 |
"https://www.googleapis.com/auth/drive.readonly",
|
| 19 |
"https://www.googleapis.com/auth/documents.readonly",
|
| 20 |
]
|
| 21 |
|
| 22 |
-
###
|
| 23 |
INITIAL_FILEID = "1xWRgZ4c6BhBV97WniRY5vIWTyGlQSMljjXggKt3jfIY"
|
| 24 |
TECHNICAL_SEO_FILEID = "1HGt1K9AbFz1GwY6jzQiVGmqZwgP6zHPwDotGD1d8bHU"
|
| 25 |
CONTENT_WRITING_FILEID = "1IdSXZwKeMo4su80s3sn4iSEQ18pvXDBsFW_uobj4zBQ"
|
|
@@ -51,16 +52,16 @@ def auth_google_docs():
|
|
| 51 |
return creds
|
| 52 |
|
| 53 |
|
| 54 |
-
|
| 55 |
creds = auth_google_docs()
|
| 56 |
### Build Google Docs Service
|
| 57 |
service = build("docs", "v1", credentials=creds)
|
| 58 |
-
|
| 59 |
doc_ids = [
|
| 60 |
INITIAL_FILEID, TECHNICAL_SEO_FILEID, CONTENT_WRITING_FILEID,
|
| 61 |
CONTENT_MARKETING_FILEID, LOCAL_SEO_FILEID
|
| 62 |
]
|
| 63 |
-
|
| 64 |
docs = []
|
| 65 |
|
| 66 |
for doc_id in doc_ids:
|
|
@@ -68,14 +69,14 @@ def auth_google_docs():
|
|
| 68 |
doc = service.documents().get(documentId=doc_id).execute()
|
| 69 |
title = doc["title"]
|
| 70 |
elements = doc["body"]["content"]
|
| 71 |
-
|
| 72 |
text = ""
|
| 73 |
for elem in elements:
|
| 74 |
if "paragraph" in elem:
|
| 75 |
for run in elem["paragraph"]["elements"]:
|
| 76 |
if "textRun" in run:
|
| 77 |
text += run["textRun"]["content"]
|
| 78 |
-
|
| 79 |
### Pydantic Document for each doc
|
| 80 |
docs.append(
|
| 81 |
Document(
|
|
@@ -83,10 +84,10 @@ def auth_google_docs():
|
|
| 83 |
metadata={"title": title, "id": doc_id}
|
| 84 |
)
|
| 85 |
)
|
| 86 |
-
|
| 87 |
except Exception as e:
|
| 88 |
print(f"{doc_id} not found: {e}")
|
| 89 |
-
|
| 90 |
|
| 91 |
return docs
|
| 92 |
|
|
|
|
| 1 |
### Google Docs API (Fetching Text)
|
| 2 |
|
| 3 |
+
import os.path, json
|
| 4 |
from typing import List
|
| 5 |
|
| 6 |
from google.auth.transport.requests import Request
|
|
|
|
| 11 |
from langchain.schema import Document
|
| 12 |
|
| 13 |
|
| 14 |
+
TOKEN_PATH = json.loads(os.path("GOOGLE_DOCS_TOKEN_JSON") )
|
| 15 |
+
CREDENTIALS_PATH = json.loads(os.path("GOOGLE_DOCS_CREDENTIALS_JSON"))
|
| 16 |
+
|
| 17 |
|
| 18 |
SCOPES = [
|
| 19 |
"https://www.googleapis.com/auth/drive.readonly",
|
| 20 |
"https://www.googleapis.com/auth/documents.readonly",
|
| 21 |
]
|
| 22 |
|
| 23 |
+
### Google Docs File IDs:
|
| 24 |
INITIAL_FILEID = "1xWRgZ4c6BhBV97WniRY5vIWTyGlQSMljjXggKt3jfIY"
|
| 25 |
TECHNICAL_SEO_FILEID = "1HGt1K9AbFz1GwY6jzQiVGmqZwgP6zHPwDotGD1d8bHU"
|
| 26 |
CONTENT_WRITING_FILEID = "1IdSXZwKeMo4su80s3sn4iSEQ18pvXDBsFW_uobj4zBQ"
|
|
|
|
| 52 |
return creds
|
| 53 |
|
| 54 |
|
| 55 |
+
def get_docs_text() -> List[Document]:
|
| 56 |
creds = auth_google_docs()
|
| 57 |
### Build Google Docs Service
|
| 58 |
service = build("docs", "v1", credentials=creds)
|
| 59 |
+
|
| 60 |
doc_ids = [
|
| 61 |
INITIAL_FILEID, TECHNICAL_SEO_FILEID, CONTENT_WRITING_FILEID,
|
| 62 |
CONTENT_MARKETING_FILEID, LOCAL_SEO_FILEID
|
| 63 |
]
|
| 64 |
+
|
| 65 |
docs = []
|
| 66 |
|
| 67 |
for doc_id in doc_ids:
|
|
|
|
| 69 |
doc = service.documents().get(documentId=doc_id).execute()
|
| 70 |
title = doc["title"]
|
| 71 |
elements = doc["body"]["content"]
|
| 72 |
+
|
| 73 |
text = ""
|
| 74 |
for elem in elements:
|
| 75 |
if "paragraph" in elem:
|
| 76 |
for run in elem["paragraph"]["elements"]:
|
| 77 |
if "textRun" in run:
|
| 78 |
text += run["textRun"]["content"]
|
| 79 |
+
|
| 80 |
### Pydantic Document for each doc
|
| 81 |
docs.append(
|
| 82 |
Document(
|
|
|
|
| 84 |
metadata={"title": title, "id": doc_id}
|
| 85 |
)
|
| 86 |
)
|
| 87 |
+
|
| 88 |
except Exception as e:
|
| 89 |
print(f"{doc_id} not found: {e}")
|
| 90 |
+
|
| 91 |
|
| 92 |
return docs
|
| 93 |
|