Spaces:
Runtime error
Runtime error
| from langchain.llms import OpenAI | |
| from langchain.chains.qa_with_sources import load_qa_with_sources_chain | |
| from langchain.docstore.document import Document | |
| import requests | |
| import pathlib | |
| import subprocess | |
| import tempfile | |
| import os | |
| import gradio as gr | |
| import pickle | |
| from huggingface_hub import HfApi, upload_folder | |
| from huggingface_hub import whoami, list_models | |
| # using a vector space for our search | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.vectorstores.faiss import FAISS | |
| from langchain.text_splitter import CharacterTextSplitter | |
| #Code for extracting the markdown fies from a Repo | |
| #To get markdowns from github for any/your repo | |
| def get_github_docs(repo_link): | |
| repo_owner, repo_name = repo_link.split('/')[-2], repo_link.split('/')[-1] | |
| with tempfile.TemporaryDirectory() as d: | |
| subprocess.check_call( | |
| f"git clone https://github.com/{repo_owner}/{repo_name}.git .", | |
| cwd=d, | |
| shell=True, | |
| ) | |
| git_sha = ( | |
| subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d) | |
| .decode("utf-8") | |
| .strip() | |
| ) | |
| repo_path = pathlib.Path(d) | |
| markdown_files = list(repo_path.rglob("*.md")) + list( | |
| repo_path.rglob("*.mdx") | |
| ) | |
| for markdown_file in markdown_files: | |
| try: | |
| with open(markdown_file, "r") as f: | |
| relative_path = markdown_file.relative_to(repo_path) | |
| github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}" | |
| yield Document(page_content=f.read(), metadata={"source": github_url}) | |
| except FileNotFoundError: | |
| print(f"Could not open file: {markdown_file}") | |
| #Code for creating a new space for the user | |
| def create_space(repo_link, hf_token): | |
| repo_name = repo_link.split('/')[-1] | |
| api = HfApi(token=hf_token) | |
| repo_url = api.create_repo( | |
| repo_id=f'LangChain_{repo_name}Bot', #example - ysharma/LangChain_GradioBot | |
| exist_ok = True, | |
| repo_type="space", | |
| space_sdk="gradio", | |
| private=False) | |
| #Code for creating the search index | |
| #Saving search index to disk | |
| def create_search_index(repo_link, openai_api_key): | |
| sources = get_github_docs(repo_link) | |
| source_chunks = [] | |
| splitter = CharacterTextSplitter(separator=" ", chunk_size=1024, chunk_overlap=0) | |
| for source in sources: | |
| for chunk in splitter.split_text(source.page_content): | |
| source_chunks.append(Document(page_content=chunk, metadata=source.metadata)) | |
| search_index = FAISS.from_documents(source_chunks, OpenAIEmbeddings(openai_api_key=openai_api_key)) | |
| #saving FAISS search index to disk | |
| with open("search_index.pickle", "wb") as f: | |
| pickle.dump(search_index, f) | |
| return "search_index.pickle" | |
| def upload_files_to_space(repo_link, hf_token): | |
| repo_name = repo_link.split('/')[-1] | |
| api = HfApi(token=hf_token) | |
| user_name = whoami(token=hf_token)['name'] | |
| #Replacing the repo namein app.py | |
| with open("template/app_og.py", "r") as f: | |
| app = f.read() | |
| app = app.replace("$RepoName", repo_name) | |
| #Saving the new app.py file to disk | |
| with open("template/app.py", "w") as f: | |
| f.write(app) | |
| #Uploading the new app.py to the new space | |
| api.upload_file( | |
| path_or_fileobj = "template/app.py", | |
| path_in_repo = "app.py", | |
| repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id, | |
| token = hf_token, | |
| repo_type="space",) | |
| #Uploading the new search_index file to the new space | |
| api.upload_file( | |
| path_or_fileobj = "search_index.pickle", | |
| path_in_repo = "search_index.pickle", | |
| repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id, | |
| token = hf_token, | |
| repo_type="space",) | |
| #Upload requirements.txt to the space | |
| api.upload_file( | |
| path_or_fileobj="template/requirements.txt", | |
| path_in_repo="requirements.txt", | |
| repo_id=f'{user_name}/LangChain_{repo_name}Bot', #model_id, | |
| token=hf_token, | |
| repo_type="space",) | |
| #Deleting the files - search_index and app.py file | |
| os.remove("template/app.py") | |
| os.remove("search_index.pickle") | |
| repo_url = f"https://huggingface.co/spaces/{user_name}/LangChain_{repo_name}Bot" | |
| space_name = f"{user_name}/LangChain_{repo_name}Bot" | |
| return "<p style='color: orange; text-align: center; font-size: 24px; background-color: lightgray;'>🎉Congratulations🎉 Chatbot created successfully! Access it here : <a href="+ repo_url + " target='_blank'>" + space_name + "</a></p>" | |
| def driver(repo_link, hf_token): | |
| #create search index openai_api_key=openai_api_key | |
| #search_index_pickle = create_search_index(repo_link, openai_api_key) | |
| #create a new space | |
| create_space(repo_link, hf_token) | |
| #upload files to the new space | |
| html_tag = upload_files_to_space(repo_link, hf_token) | |
| print(f"html tag is : {html_tag}") | |
| return html_tag | |
| def set_state(): | |
| return gr.update(visible=True), gr.update(visible=True) | |
| #Gradio code for Repo as input and search index as output file | |
| with gr.Blocks() as demo: | |
| gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;"> | |
| <div | |
| style=" | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 0.8rem; | |
| font-size: 1.75rem; | |
| " | |
| > | |
| <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;"> | |
| QandA Chatbot Creator for Github Repos - Automation done using LangChain, Gradio, and Spaces | |
| </h1> | |
| </div> | |
| <p style="margin-bottom: 10px; font-size: 94%"> | |
| Generate a top-notch <b>Q&A Chatbot</b> for your Github Repo, using <a href="https://langchain.readthedocs.io/en/latest/" target="_blank">LangChain</a> and <a href="https://github.com/gradio-app/gradio" target="_blank">Gradio</a>. | |
| Paste your Github repository link, enter your OpenAI API key, and the app will create a FAISS embedding vector space for you. | |
| Next, input your Huggingface Token and press the final button.<br><br> | |
| Your new chatbot will be ready under your Huggingface profile, accessible via the displayed link. | |
| <center><a href="https://huggingface.co/spaces/ysharma/LangchainBot-space-creator?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></center> | |
| </p> | |
| </div>""") | |
| with gr.Row() : | |
| with gr.Column(): | |
| repo_link = gr.Textbox(label="Enter Github repo name") | |
| openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here") | |
| btn_faiss = gr.Button("Create Search index") | |
| search_index_file = gr.File(label= 'Search index vector') | |
| with gr.Row(): | |
| hf_token_in = gr.Textbox(type='password', label="Enter hf-token name", visible=False) | |
| btn_create_space = gr.Button("Create Your Chatbot", visible=False) | |
| html_out = gr.HTML() | |
| btn_faiss.click(create_search_index, [repo_link, openai_api_key],search_index_file ) | |
| btn_faiss.click(fn=set_state, inputs=[] , outputs=[hf_token_in, btn_create_space]) | |
| btn_create_space.click(driver, [repo_link, hf_token_in], html_out) | |
| demo.queue() | |
| demo.launch(debug=True) |