Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import os | |
| from dotenv import load_dotenv | |
| from langchain.document_loaders import GithubFileLoader | |
| # from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_text_splitters import CharacterTextSplitter | |
| load_dotenv() | |
| #get the GITHUB_ACCESS_TOKEN from the .env file | |
| GITHUB_ACCESS_TOKEN = os.getenv("GITHUB_ACCESS_TOKEN") | |
| GITHUB_BASE_URL = "https://github.com/" | |
| def get_hugging_face_model(): | |
| model_name = "mchochlov/codebert-base-cd-ft" | |
| hf = HuggingFaceEmbeddings(model_name=model_name) | |
| return hf | |
| def get_similar_files(query, db, embeddings): | |
| # embedding_vector = embeddings.embed_query(query) | |
| # docs_and_scores = db.similarity_search_by_vector(embedding_vector, k = 10) | |
| docs_and_scores = db.similarity_search_with_score(query) | |
| return docs_and_scores | |
| st.title("Find Similar Code") | |
| #streamlit text input for USER | |
| USER = st.text_input("Enter the Github User", value = "heaversm") | |
| #streamlit text input for REPO | |
| REPO = st.text_input("Enter the Github Repository", value = "gdrive-docker") | |
| #streamlit file type selector | |
| FILE_TYPES_TO_LOAD = st.multiselect("Select File Types", [".py", ".ts",".js",".css",".html"], default = [".py"]) | |
| text_input = st.text_area("Enter a Code Example", value = | |
| """ | |
| def create_app(): | |
| app = connexion.FlaskApp(__name__, specification_dir="../.openapi") | |
| app.add_api( | |
| API_VERSION, resolver=connexion.resolver.RelativeResolver("provider.app") | |
| ) | |
| """, height = 330 | |
| ) | |
| button = st.button("Find Similar Code") | |
| if button: | |
| loader = GithubFileLoader( | |
| #repo is USER/REPO | |
| repo=f"{USER}/{REPO}", | |
| access_token=GITHUB_ACCESS_TOKEN, | |
| github_api_url="https://api.github.com", | |
| file_filter=lambda file_path: file_path.endswith( | |
| tuple(FILE_TYPES_TO_LOAD) | |
| ) | |
| ) | |
| documents = loader.load() | |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
| docs = text_splitter.split_documents(documents) | |
| embedding_vector = get_hugging_face_model() | |
| db = FAISS.from_documents(docs, embedding_vector) | |
| query = text_input | |
| results_with_scores = get_similar_files(query, db, embedding_vector) | |
| for doc, score in results_with_scores: | |
| print(f"Metadata: {doc.metadata}, Score: {score}") | |
| top_file_path = results_with_scores[0][0].metadata['path'] | |
| top_file_content = results_with_scores[0][0].page_content | |
| top_file_score = results_with_scores[0][1] | |
| top_file_link = f"{GITHUB_BASE_URL}{USER}/{REPO}/blob/main/{top_file_path}" | |
| # write a clickable link in streamlit | |
| st.markdown(f"[Top file link]({top_file_link})") | |
| else: | |
| st.info("Please Submit a Code Sample") | |