Spaces:
Runtime error
Runtime error
| # import json | |
| # from facebook_page_scraper import Facebook_scraper | |
| # from facebook_page_scraper import Facebook_scraper | |
| # from config import * | |
| # #instantiate the Facebook_scraper class | |
| # page_name = "fptsoftware.official" | |
| # posts_count = 15 | |
| # browser = "firefox" | |
| # timeout = 600 #600 seconds | |
| # headless = True | |
| # meta_ai = Facebook_scraper(page_name, posts_count, browser, timeout=timeout, headless=headless) | |
| # json_data = meta_ai.scrap_to_json() | |
| # with open('data.json', 'w') as f: | |
| # json.dump(json_data, f) | |
| import json | |
| from pydantic import Field | |
| from langchain.load.serializable import Serializable | |
| import pinecone | |
| # from langchain.vectorstores import Pinecone | |
| from custom_vectordb import Pinecone | |
| from config import PINECONE_API_KEY, PINECONE_ENVIRONMENT, INDEX_NAME, CONNECTION_STRING, CONTAINER_NAME, NAME_SPACE_1, NAME_SPACE_2 | |
| from config import EMBEDDING_API_BASE, EMBEDDING_API_KEY, OPENAI_API_TYPE, OPENAI_API_VERSION, EMBEDDING_DEPLOYMENT_ID | |
| from langchain.embeddings import OpenAIEmbeddings | |
| import ast | |
| with open('data.json') as json_file: | |
| data = json.load(json_file) | |
| class Document(Serializable): | |
| """Class for storing a piece of text and associated metadata.""" | |
| page_content: str | |
| """String text.""" | |
| metadata: dict = Field(default_factory=dict) | |
| """Arbitrary metadata about the page content (e.g., source, relationships to other | |
| documents, etc.). | |
| """ | |
| datas = ast.literal_eval(data) | |
| # initialize pinecone | |
| pinecone.init( | |
| api_key=PINECONE_API_KEY, # find at app.pinecone.io | |
| environment=PINECONE_ENVIRONMENT, # next to api key in console | |
| ) | |
| index = pinecone.Index(INDEX_NAME) | |
| index.delete(delete_all=True, namespace=NAME_SPACE_2) | |
| embeddings = OpenAIEmbeddings( | |
| deployment=EMBEDDING_DEPLOYMENT_ID, | |
| openai_api_key=EMBEDDING_API_KEY, | |
| openai_api_base=EMBEDDING_API_BASE, | |
| openai_api_type=OPENAI_API_TYPE, | |
| openai_api_version=OPENAI_API_VERSION, | |
| chunk_size=16 | |
| ) | |
| texts = [] | |
| for k, v in datas.items(): | |
| content = v["content"].split("-----")[0] + "\nimage_link: " + str(v["image"]) | |
| post_url = v["post_url"] | |
| texts.append(Document(page_content=content, metadata={"source": post_url})) | |
| print(len(texts)) | |
| if len(texts)>0: | |
| Pinecone.from_documents(texts, embeddings, index_name=INDEX_NAME, namespace=NAME_SPACE_2) | |
| message = f"Add facebook data to space {NAME_SPACE_2} in {INDEX_NAME} sucessfully" | |
| print(message) |