Spaces:
Sleeping
Sleeping
| from llama_hub.github_repo import GithubRepositoryReader, GithubClient | |
| from llama_index import download_loader, GPTVectorStoreIndex | |
| from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext | |
| from langchain.llms import AzureOpenAI | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from llama_index import LangchainEmbedding, ServiceContext | |
| from llama_index import StorageContext, load_index_from_storage | |
| from dotenv import load_dotenv | |
| import os | |
| import pickle | |
| def main() -> None: | |
| # define embedding | |
| embedding = LangchainEmbedding(OpenAIEmbeddings(chunk_size=1)) | |
| # define LLM | |
| llm_predictor = LLMPredictor( | |
| llm=AzureOpenAI( | |
| engine="text-davinci-003", | |
| model_name="text-davinci-003", | |
| ) | |
| ) | |
| # configure service context | |
| service_context = ServiceContext.from_defaults( | |
| llm_predictor=llm_predictor, embed_model=embedding | |
| ) | |
| download_loader("GithubRepositoryReader") | |
| docs = None | |
| if os.path.exists("docs/docs.pkl"): | |
| with open("docs/docs.pkl", "rb") as f: | |
| docs = pickle.load(f) | |
| if docs is None: | |
| github_client = GithubClient(os.getenv("GITHUB_TOKEN")) | |
| loader = GithubRepositoryReader( | |
| github_client, | |
| owner="ctripcorp", | |
| repo="x-pipe", | |
| filter_directories=( | |
| [".", "doc"], | |
| GithubRepositoryReader.FilterType.INCLUDE, | |
| ), | |
| filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE), | |
| verbose=True, | |
| concurrent_requests=10, | |
| ) | |
| docs = loader.load_data(branch="master") | |
| with open("docs/docs.pkl", "wb") as f: | |
| pickle.dump(docs, f) | |
| index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context) | |
| query_engine = index.as_query_engine(service_context=service_context) | |
| response = query_engine.query("如何使用X-Pipe?") | |
| print(response) | |
| if __name__ == "__main__": | |
| load_dotenv() | |
| main() | |