Spaces:
Runtime error
Runtime error
conf files
Browse files- Dockerfile +14 -0
- api/external_services.py +111 -0
- api/main.py +59 -0
- requirements.txt +5 -0
- resources.yaml +2 -0
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
+
# you will also find guides on how best to write your Dockerfile
|
| 3 |
+
|
| 4 |
+
FROM python:3.9
|
| 5 |
+
|
| 6 |
+
WORKDIR /code
|
| 7 |
+
|
| 8 |
+
COPY ./requirements.txt /code/requirements.txt
|
| 9 |
+
|
| 10 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
| 11 |
+
|
| 12 |
+
COPY . .
|
| 13 |
+
|
| 14 |
+
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
api/external_services.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import os
|
| 3 |
+
from fastapi import HTTPException
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from llama_index import download_loader
|
| 6 |
+
from llama_hub.github_repo import GithubRepositoryReader, GithubClient
|
| 7 |
+
from llama_index import VectorStoreIndex
|
| 8 |
+
from llama_index.vector_stores import DeepLakeVectorStore
|
| 9 |
+
from llama_index.storage.storage_context import StorageContext
|
| 10 |
+
import yaml
|
| 11 |
+
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
+
# Fetch and set API keys
|
| 15 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Check for OpenAI API key
|
| 19 |
+
if not openai_api_key:
|
| 20 |
+
raise EnvironmentError("OpenAI API key not found in environment variables")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def get_validate_token(token_name):
|
| 24 |
+
token = os.getenv(token_name)
|
| 25 |
+
if not token:
|
| 26 |
+
raise EnvironmentError(f"{token_name} not found in environment variables")
|
| 27 |
+
return token
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class InitiazlizeGithubService:
|
| 31 |
+
def __init__(self):
|
| 32 |
+
self.owner = None
|
| 33 |
+
self.repo = None
|
| 34 |
+
self.github_token = get_validate_token("GITHUB_TOKEN") # Check for GitHub Token
|
| 35 |
+
self.github_client = self.initialize_github_client(self.github_token)
|
| 36 |
+
download_loader("GithubRepositoryReader")
|
| 37 |
+
|
| 38 |
+
def initialize_github_client(self, github_token):
|
| 39 |
+
return GithubClient(github_token)
|
| 40 |
+
|
| 41 |
+
def parse_github_url(self, url):
|
| 42 |
+
pattern = r"https://github\.com/([^/]+)/([^/]+)"
|
| 43 |
+
match = re.match(pattern, url)
|
| 44 |
+
return match.groups() if match else (None, None)
|
| 45 |
+
|
| 46 |
+
def validate_owner_repo(self, owner, repo):
|
| 47 |
+
if bool(owner) and bool(repo):
|
| 48 |
+
self.owner = owner
|
| 49 |
+
self.repo = repo
|
| 50 |
+
return True
|
| 51 |
+
|
| 52 |
+
return False
|
| 53 |
+
|
| 54 |
+
def load_repo_data(self, owner, repo):
|
| 55 |
+
if self.validate_owner_repo(owner, repo):
|
| 56 |
+
loader = GithubRepositoryReader(
|
| 57 |
+
self.github_client,
|
| 58 |
+
owner=self.owner,
|
| 59 |
+
repo=self.repo,
|
| 60 |
+
filter_file_extensions=(
|
| 61 |
+
[".py", ".js", ".ts", ".md"],
|
| 62 |
+
GithubRepositoryReader.FilterType.INCLUDE,
|
| 63 |
+
),
|
| 64 |
+
verbose=False,
|
| 65 |
+
concurrent_requests=5,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
print(f"Loading {self.repo} repository by {self.owner}")
|
| 69 |
+
|
| 70 |
+
docs = loader.load_data(branch="main")
|
| 71 |
+
print("Documents uploaded:")
|
| 72 |
+
for doc in docs:
|
| 73 |
+
print(doc.metadata)
|
| 74 |
+
|
| 75 |
+
return docs
|
| 76 |
+
|
| 77 |
+
else:
|
| 78 |
+
raise HTTPException(
|
| 79 |
+
status_code=400,
|
| 80 |
+
detail="Invalid GitHub URL. Please enter a valid GitHub URL",
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
class InitiazlizeActiveloopService:
|
| 85 |
+
def __init__(self):
|
| 86 |
+
self.active_loop_token = get_validate_token(
|
| 87 |
+
"ACTIVELOOP_TOKEN"
|
| 88 |
+
) # Check for Activeloop Token
|
| 89 |
+
self.dataset_path = self.get_user_info("dataset_path")
|
| 90 |
+
self.vector_store = DeepLakeVectorStore(
|
| 91 |
+
dataset_path=f"hub://{self.dataset_path}",
|
| 92 |
+
overwrite=True,
|
| 93 |
+
runtime={"tensor_db": True},
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
self.storage_context = StorageContext.from_defaults(
|
| 97 |
+
vector_store=self.vector_store
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
def upload_to_activeloop(self, docs):
|
| 101 |
+
self.index = VectorStoreIndex.from_documents(
|
| 102 |
+
docs, storage_context=self.storage_context
|
| 103 |
+
)
|
| 104 |
+
self.query_engine = self.index.as_query_engine()
|
| 105 |
+
|
| 106 |
+
def get_user_info(self, user_info):
|
| 107 |
+
with open("resources.yaml", "r") as file:
|
| 108 |
+
yaml_data = yaml.safe_load(file)
|
| 109 |
+
|
| 110 |
+
retrieved_info = yaml_data["info"][user_info]
|
| 111 |
+
return retrieved_info
|
api/main.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import textwrap
|
| 2 |
+
from fastapi import FastAPI
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
from external_services import InitiazlizeGithubService, InitiazlizeActiveloopService
|
| 7 |
+
|
| 8 |
+
# Load environment variables
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
app = FastAPI()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class GitHubRepoRequest(BaseModel):
|
| 16 |
+
githubRepoUrl: str
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class UserCodeRequest(BaseModel):
|
| 20 |
+
userCode: str
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@app.post("/upload")
|
| 24 |
+
async def scrape_and_upload_to_activeloop(repo_request: GitHubRepoRequest):
|
| 25 |
+
# Add logic to scrape and upload to ActiveLoop
|
| 26 |
+
# Example: Scrape GitHub repo and upload to ActiveLoop
|
| 27 |
+
# Implement your scraping and upload logic here
|
| 28 |
+
github_service = InitiazlizeGithubService()
|
| 29 |
+
activeloop_service = InitiazlizeActiveloopService()
|
| 30 |
+
|
| 31 |
+
print(f"repo from user: {repo_request.githubRepoUrl}")
|
| 32 |
+
|
| 33 |
+
owner, repo = github_service.parse_github_url(repo_request.githubRepoUrl)
|
| 34 |
+
docs = github_service.load_repo_data(owner, repo)
|
| 35 |
+
activeloop_service.upload_to_activeloop(docs)
|
| 36 |
+
|
| 37 |
+
return {"status": "success", "message": "Repo processed successfully"}
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@app.post("/retrieve")
|
| 41 |
+
async def find_similar_code_and_explain(code_request: UserCodeRequest):
|
| 42 |
+
# Add logic to find similar code and provide explanations or improvements
|
| 43 |
+
# Example: Search in ActiveLoop DB
|
| 44 |
+
# Implement your search and analysis logic here
|
| 45 |
+
activeloop_service = InitiazlizeActiveloopService()
|
| 46 |
+
|
| 47 |
+
print(f"code from user: {code_request.userCode}")
|
| 48 |
+
|
| 49 |
+
# intro_question = "What is the repository about?"
|
| 50 |
+
intro_question = code_request.userCode
|
| 51 |
+
print(f"Test question: {intro_question}")
|
| 52 |
+
print("=" * 50)
|
| 53 |
+
|
| 54 |
+
answer = activeloop_service.query_engine.query(intro_question)
|
| 55 |
+
print(f"Answer: {textwrap.fill(str(answer), 100)} \n")
|
| 56 |
+
|
| 57 |
+
return {
|
| 58 |
+
"answer": answer,
|
| 59 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
llama-index==0.9.38
|
| 2 |
+
deeplake==3.8.17
|
| 3 |
+
openai==1.10.0
|
| 4 |
+
python-dotenv==1.0.1
|
| 5 |
+
llama-hub==0.0.76
|
resources.yaml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
info:
|
| 2 |
+
dataset_path: manufe_test/code_retriever
|