Spaces:
Sleeping
Sleeping
Upload 11 files
Browse files- __init__.py +0 -0
- about.py +48 -0
- app.py +100 -0
- display.py +53 -0
- download.py +20 -0
- forms.py +86 -0
- main.py +6 -0
- query.py +149 -0
- repo.py +57 -0
- requirements.txt +5 -0
- utils.py +49 -0
__init__.py
ADDED
|
File without changes
|
about.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
about_section = """
|
| 2 |
+
## About ChatGPT Code Review
|
| 3 |
+
|
| 4 |
+
ChatGPT Code Review is an app designed to help software developers improve
|
| 5 |
+
their code quality by leveraging the power of OpenAI's large language models.
|
| 6 |
+
The app analyzes the code in a given GitHub repository and provides
|
| 7 |
+
recommendations to enhance the code. It is a valuable tool for developers,
|
| 8 |
+
allowing them to discover potential issues in their codebase.
|
| 9 |
+
|
| 10 |
+
To use ChatGPT Code Review and get recommendations for your code, follow these
|
| 11 |
+
steps:
|
| 12 |
+
|
| 13 |
+
1. **Access the app**: Open the ChatGPT Code Review app in your web browser.
|
| 14 |
+
2. **Enter the GitHub repository URL**: In the input field labeled "GitHub
|
| 15 |
+
Repository URL", enter the URL of the repository you'd like to analyze.
|
| 16 |
+
3. **Enter your OpenAI API Key**: In the input field labeled "OpenAI API Key",
|
| 17 |
+
enter your OpenAI API key. If you don't have one, you can obtain it from the
|
| 18 |
+
[OpenAI platform](https://platform.openai.com/account/api-keys).
|
| 19 |
+
4. **Select file extensions**: Choose the file extensions you want to analyze
|
| 20 |
+
or add additional extensions in the provided input field.
|
| 21 |
+
5. **Clone the repository**: Click the "Clone Repository" button. The app will
|
| 22 |
+
display the files available for analysis in a tree structure.
|
| 23 |
+
6. **Select files to analyze**: Check the boxes next to the files you want to
|
| 24 |
+
analyze, then click the "Analyze Files" button.
|
| 25 |
+
7. **Review the recommendations**: The recommendations will be displayed in a
|
| 26 |
+
clear and structured format, with code snippets and suggested improvements.
|
| 27 |
+
Review these recommendations to identify potential areas for improvement in
|
| 28 |
+
your code.
|
| 29 |
+
|
| 30 |
+
Keep in mind that ChatGPT Code Review is an AI-powered tool, and while it can
|
| 31 |
+
provide helpful insights, it may not always be perfect. It is essential to use
|
| 32 |
+
your judgment and expertise when assessing the recommendations provided by the
|
| 33 |
+
app.
|
| 34 |
+
|
| 35 |
+
<!--don't underline links -->
|
| 36 |
+
<style>
|
| 37 |
+
a {
|
| 38 |
+
text-decoration: none;
|
| 39 |
+
}
|
| 40 |
+
</style>
|
| 41 |
+
|
| 42 |
+
### Links
|
| 43 |
+
|
| 44 |
+
- [OpenAI](https://openai.com/)
|
| 45 |
+
- [API Keys](https://platform.openai.com/account/api-keys)
|
| 46 |
+
- [GitHub](https://github.com/domvwt/chatgpt-code-review)
|
| 47 |
+
|
| 48 |
+
"""
|
app.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import about
|
| 4 |
+
import display
|
| 5 |
+
import download
|
| 6 |
+
import forms
|
| 7 |
+
import query
|
| 8 |
+
import repo
|
| 9 |
+
import streamlit as st
|
| 10 |
+
import utils
|
| 11 |
+
|
| 12 |
+
env_file_path = ".env"
|
| 13 |
+
log_file = "app.log"
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
temp_dir = "/tmp/chatgpt-code-review"
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def app():
|
| 20 |
+
utils.load_environment_variables(env_file_path)
|
| 21 |
+
utils.set_environment_variables()
|
| 22 |
+
utils.configure_logging(log_file)
|
| 23 |
+
|
| 24 |
+
with utils.TempDirContext(temp_dir):
|
| 25 |
+
st.set_page_config(
|
| 26 |
+
page_title="ChatGPT Code Review",
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
session_state = st.session_state
|
| 30 |
+
|
| 31 |
+
st.title("ChatGPT Code Review :rocket:")
|
| 32 |
+
|
| 33 |
+
with st.expander("About ChatGPT Code Review"):
|
| 34 |
+
st.markdown(about.about_section, unsafe_allow_html=True)
|
| 35 |
+
st.write("")
|
| 36 |
+
|
| 37 |
+
default_repo_url = "https://github.com/domvwt/chatgpt-code-review"
|
| 38 |
+
repo_form = forms.RepoForm(default_repo_url)
|
| 39 |
+
with st.form("repo_url_form"):
|
| 40 |
+
repo_form.display_form()
|
| 41 |
+
|
| 42 |
+
# Check if the API key is valid before proceeding
|
| 43 |
+
if repo_form.clone_repo_button and not repo_form.is_api_key_valid():
|
| 44 |
+
st.stop()
|
| 45 |
+
|
| 46 |
+
repo_url, extensions = repo_form.get_form_data()
|
| 47 |
+
|
| 48 |
+
analyze_files_form = forms.AnalyzeFilesForm(session_state)
|
| 49 |
+
with st.form("analyze_files_form"):
|
| 50 |
+
if repo_form.clone_repo_button or session_state.get("code_files"):
|
| 51 |
+
if not session_state.get("code_files"):
|
| 52 |
+
session_state.code_files = (
|
| 53 |
+
repo.list_code_files_in_repository(
|
| 54 |
+
repo_url, extensions
|
| 55 |
+
)
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
analyze_files_form.display_form()
|
| 59 |
+
|
| 60 |
+
# Analyze the selected files
|
| 61 |
+
with st.spinner("Analyzing files..."):
|
| 62 |
+
if session_state.get("analyze_files"):
|
| 63 |
+
if session_state.get("selected_files"):
|
| 64 |
+
recommendations = query.analyze_code_files(
|
| 65 |
+
session_state.selected_files
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# Display the recommendations
|
| 69 |
+
st.header("Recommendations")
|
| 70 |
+
first = True
|
| 71 |
+
recommendation_list = []
|
| 72 |
+
for rec in recommendations:
|
| 73 |
+
if not first:
|
| 74 |
+
st.write("---")
|
| 75 |
+
else:
|
| 76 |
+
first = False
|
| 77 |
+
st.subheader(display.escape_markdown(rec["code_file"]))
|
| 78 |
+
recommendation = (
|
| 79 |
+
rec["recommendation"] or "No recommendations"
|
| 80 |
+
)
|
| 81 |
+
st.markdown(recommendation)
|
| 82 |
+
with st.expander("View Code"):
|
| 83 |
+
extension = os.path.splitext(rec["code_file"])[1]
|
| 84 |
+
display.display_code(
|
| 85 |
+
rec["code_snippet"], extension
|
| 86 |
+
)
|
| 87 |
+
recommendation_list.append(rec)
|
| 88 |
+
if recommendation_list:
|
| 89 |
+
session_state.recommendation_list = recommendation_list
|
| 90 |
+
else:
|
| 91 |
+
st.error("Please select at least one file to analyze.")
|
| 92 |
+
st.stop()
|
| 93 |
+
|
| 94 |
+
st.write("")
|
| 95 |
+
|
| 96 |
+
download.download_markdown(session_state.get("recommendation_list"))
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
if __name__ == "__main__":
|
| 100 |
+
app()
|
display.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from utils import EXTENSION_TO_LANGUAGE_MAP
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def extension_to_language(file_extension: str) -> Optional[str]:
|
| 9 |
+
"""Return the programming language corresponding to a given file extension."""
|
| 10 |
+
return EXTENSION_TO_LANGUAGE_MAP.get(file_extension.lower(), None)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def display_code(code: str, extension: str) -> None:
|
| 14 |
+
"""Display the code snippet in the specified language."""
|
| 15 |
+
language = extension_to_language(extension)
|
| 16 |
+
markdown_code = f"```{language}\n{code}\n```"
|
| 17 |
+
st.markdown(markdown_code)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def escape_markdown(text: str) -> str:
|
| 21 |
+
"""Escape markdown characters in a string."""
|
| 22 |
+
escape_chars = [
|
| 23 |
+
"\\",
|
| 24 |
+
"`",
|
| 25 |
+
"*",
|
| 26 |
+
"_",
|
| 27 |
+
"{",
|
| 28 |
+
"}",
|
| 29 |
+
"[",
|
| 30 |
+
"]",
|
| 31 |
+
"(",
|
| 32 |
+
")",
|
| 33 |
+
"#",
|
| 34 |
+
"+",
|
| 35 |
+
"-",
|
| 36 |
+
".",
|
| 37 |
+
"!",
|
| 38 |
+
]
|
| 39 |
+
regex = re.compile("|".join(map(re.escape, escape_chars)))
|
| 40 |
+
return regex.sub(r"\\\g<0>", text)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def generate_markdown(recommendations):
|
| 44 |
+
markdown_str = "# ChatGPT Code Review Recommendations\n\n"
|
| 45 |
+
|
| 46 |
+
for rec in recommendations:
|
| 47 |
+
code_file = rec["code_file"]
|
| 48 |
+
recommendation = rec["recommendation"] or "No recommendations"
|
| 49 |
+
|
| 50 |
+
markdown_str += f"## {code_file}\n\n"
|
| 51 |
+
markdown_str += f"{recommendation}\n\n"
|
| 52 |
+
|
| 53 |
+
return markdown_str
|
download.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import display
|
| 2 |
+
import streamlit as st
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def download_markdown(recommendations):
|
| 6 |
+
if recommendations:
|
| 7 |
+
st.download_button(
|
| 8 |
+
"Download Markdown",
|
| 9 |
+
data=display.generate_markdown(recommendations),
|
| 10 |
+
file_name="chatgpt_recommendations.md",
|
| 11 |
+
mime="text/markdown",
|
| 12 |
+
)
|
| 13 |
+
else:
|
| 14 |
+
st.download_button(
|
| 15 |
+
"Download Markdown",
|
| 16 |
+
data="",
|
| 17 |
+
file_name="chatgpt_recommendations.md",
|
| 18 |
+
mime="text/markdown",
|
| 19 |
+
disabled=True,
|
| 20 |
+
)
|
forms.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
import openai
|
| 5 |
+
import repo
|
| 6 |
+
import streamlit as st
|
| 7 |
+
from streamlit_tree_select import tree_select
|
| 8 |
+
from utils import EXTENSION_TO_LANGUAGE_MAP
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class RepoForm:
|
| 12 |
+
"""A class to encapsulate the repository form and its operations."""
|
| 13 |
+
|
| 14 |
+
options = EXTENSION_TO_LANGUAGE_MAP.keys()
|
| 15 |
+
|
| 16 |
+
def __init__(self, default_repo_url: str):
|
| 17 |
+
self.default_repo_url = default_repo_url
|
| 18 |
+
self.repo_url = ""
|
| 19 |
+
self.api_key = ""
|
| 20 |
+
self.extensions = []
|
| 21 |
+
self.additional_extensions = ""
|
| 22 |
+
|
| 23 |
+
def display_form(self):
|
| 24 |
+
"""Displays the repository form and its elements."""
|
| 25 |
+
self.repo_url = st.text_input(
|
| 26 |
+
"GitHub Repository URL:", self.default_repo_url
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
env_api_key = os.getenv("OPENAI_API_KEY", "")
|
| 30 |
+
self.api_key = st.text_input(
|
| 31 |
+
"OpenAI API Key:",
|
| 32 |
+
env_api_key,
|
| 33 |
+
placeholder="Paste your API key here",
|
| 34 |
+
)
|
| 35 |
+
openai.api_key = self.api_key
|
| 36 |
+
|
| 37 |
+
self.extensions = st.multiselect(
|
| 38 |
+
"File extensions to analyze",
|
| 39 |
+
options=self.options,
|
| 40 |
+
default=self.options,
|
| 41 |
+
)
|
| 42 |
+
self.additional_extensions = st.text_input(
|
| 43 |
+
"Additional file extensions to analyze (comma-separated):"
|
| 44 |
+
)
|
| 45 |
+
if self.additional_extensions:
|
| 46 |
+
self.extensions.extend(
|
| 47 |
+
[ext.strip() for ext in self.additional_extensions.split(",")]
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
self.clone_repo_button = st.form_submit_button("Clone Repository")
|
| 51 |
+
|
| 52 |
+
def get_form_data(self):
|
| 53 |
+
"""Returns the data captured by the repository form."""
|
| 54 |
+
return (
|
| 55 |
+
self.repo_url,
|
| 56 |
+
self.extensions,
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
def is_api_key_valid(self):
|
| 60 |
+
"""Checks if the OpenAI API key is valid and returns a boolean value."""
|
| 61 |
+
if not self.api_key:
|
| 62 |
+
st.error("Please enter your OpenAI API key.")
|
| 63 |
+
return False
|
| 64 |
+
return True
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class AnalyzeFilesForm:
|
| 68 |
+
"""A class to encapsulate the analyze files form and its operations."""
|
| 69 |
+
|
| 70 |
+
def __init__(self, session_state):
|
| 71 |
+
self.session_state = session_state
|
| 72 |
+
|
| 73 |
+
def display_form(self):
|
| 74 |
+
"""Displays the analyze files form and its elements."""
|
| 75 |
+
st.write("Select files to analyze:")
|
| 76 |
+
file_tree = repo.create_file_tree(self.session_state.code_files)
|
| 77 |
+
self.session_state.selected_files = tree_select(
|
| 78 |
+
file_tree,
|
| 79 |
+
show_expand_all=True,
|
| 80 |
+
check_model="leaf",
|
| 81 |
+
checked=self.session_state.get("selected_files"),
|
| 82 |
+
)["checked"]
|
| 83 |
+
logging.info("Selected files: %s", self.session_state.selected_files)
|
| 84 |
+
self.session_state.analyze_files = st.form_submit_button(
|
| 85 |
+
"Analyze Files"
|
| 86 |
+
) or self.session_state.get("analyze_files")
|
main.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# NOTE: Keeping this so we don't have to redeploy the Streamlit app
|
| 2 |
+
from app import app
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
if __name__ == "__main__":
|
| 6 |
+
app()
|
query.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from textwrap import dedent
|
| 3 |
+
from typing import Iterable
|
| 4 |
+
|
| 5 |
+
import openai
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import tiktoken
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def analyze_code_files(code_files: list[str]) -> Iterable[dict[str, str]]:
|
| 11 |
+
"""Analyze the selected code files and return recommendations."""
|
| 12 |
+
return (analyze_code_file(code_file) for code_file in code_files)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def analyze_code_file(code_file: str) -> dict[str, str]:
|
| 16 |
+
"""Analyze a code file and return a dictionary with file information and recommendations."""
|
| 17 |
+
with open(code_file, "r") as f:
|
| 18 |
+
code_content = f.read()
|
| 19 |
+
|
| 20 |
+
if not code_content:
|
| 21 |
+
return {
|
| 22 |
+
"code_file": code_file,
|
| 23 |
+
"code_snippet": code_content,
|
| 24 |
+
"recommendation": "No code found in file",
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
logging.info("Analyzing code file: %s", code_file)
|
| 29 |
+
analysis = get_code_analysis(code_content)
|
| 30 |
+
except Exception as e:
|
| 31 |
+
logging.info("Error analyzing code file: %s", code_file)
|
| 32 |
+
analysis = f"Error analyzing code file: {e}"
|
| 33 |
+
|
| 34 |
+
return {
|
| 35 |
+
"code_file": code_file,
|
| 36 |
+
"code_snippet": code_content,
|
| 37 |
+
"recommendation": analysis,
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def get_num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
|
| 42 |
+
"""Returns the number of tokens used by a list of messages."""
|
| 43 |
+
# Source: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
| 44 |
+
try:
|
| 45 |
+
encoding = tiktoken.encoding_for_model(model)
|
| 46 |
+
except KeyError:
|
| 47 |
+
logging.debug("Model not found. Using cl100k_base encoding.")
|
| 48 |
+
encoding = tiktoken.get_encoding("cl100k_base")
|
| 49 |
+
if model == "gpt-3.5-turbo":
|
| 50 |
+
logging.debug(
|
| 51 |
+
"gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301."
|
| 52 |
+
)
|
| 53 |
+
return get_num_tokens_from_messages(
|
| 54 |
+
messages, model="gpt-3.5-turbo-0301"
|
| 55 |
+
)
|
| 56 |
+
elif model == "gpt-4":
|
| 57 |
+
logging.debug(
|
| 58 |
+
"gpt-4 may change over time. Returning num tokens assuming gpt-4-0314."
|
| 59 |
+
)
|
| 60 |
+
return get_num_tokens_from_messages(messages, model="gpt-4-0314")
|
| 61 |
+
elif model == "gpt-3.5-turbo-0301":
|
| 62 |
+
tokens_per_message = (
|
| 63 |
+
4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
|
| 64 |
+
)
|
| 65 |
+
tokens_per_name = -1 # if there's a name, the role is omitted
|
| 66 |
+
elif model == "gpt-4-0314":
|
| 67 |
+
tokens_per_message = 3
|
| 68 |
+
tokens_per_name = 1
|
| 69 |
+
else:
|
| 70 |
+
raise NotImplementedError(
|
| 71 |
+
f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
|
| 72 |
+
)
|
| 73 |
+
num_tokens = 0
|
| 74 |
+
for message in messages:
|
| 75 |
+
num_tokens += tokens_per_message
|
| 76 |
+
for key, value in message.items():
|
| 77 |
+
num_tokens += len(encoding.encode(value))
|
| 78 |
+
if key == "name":
|
| 79 |
+
num_tokens += tokens_per_name
|
| 80 |
+
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
|
| 81 |
+
return num_tokens
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
@st.cache_data(show_spinner=False)
|
| 85 |
+
def get_code_analysis(code: str) -> str:
|
| 86 |
+
"""Get code analysis from the OpenAI API."""
|
| 87 |
+
prompt = dedent(
|
| 88 |
+
f"""\
|
| 89 |
+
Please review the code below and identify any syntax or logical errors, suggest
|
| 90 |
+
ways to refactor and improve code quality, enhance performance, address security
|
| 91 |
+
concerns, and align with best practices. Provide specific examples for each area
|
| 92 |
+
and limit your recommendations to three per category.
|
| 93 |
+
|
| 94 |
+
Use the following response format, keeping the section headings as-is, and provide
|
| 95 |
+
your feedback. Use bullet points for each response. The provided examples are for
|
| 96 |
+
illustration purposes only and should not be repeated.
|
| 97 |
+
|
| 98 |
+
**Syntax and logical errors (example)**:
|
| 99 |
+
- Incorrect indentation on line 12
|
| 100 |
+
- Missing closing parenthesis on line 23
|
| 101 |
+
|
| 102 |
+
**Code refactoring and quality (example)**:
|
| 103 |
+
- Replace multiple if-else statements with a switch case for readability
|
| 104 |
+
- Extract repetitive code into separate functions
|
| 105 |
+
|
| 106 |
+
**Performance optimization (example)**:
|
| 107 |
+
- Use a more efficient sorting algorithm to reduce time complexity
|
| 108 |
+
- Cache results of expensive operations for reuse
|
| 109 |
+
|
| 110 |
+
**Security vulnerabilities (example)**:
|
| 111 |
+
- Sanitize user input to prevent SQL injection attacks
|
| 112 |
+
- Use prepared statements for database queries
|
| 113 |
+
|
| 114 |
+
**Best practices (example)**:
|
| 115 |
+
- Add meaningful comments and documentation to explain the code
|
| 116 |
+
- Follow consistent naming conventions for variables and functions
|
| 117 |
+
|
| 118 |
+
Code:
|
| 119 |
+
```
|
| 120 |
+
{code}
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
Your review:"""
|
| 124 |
+
)
|
| 125 |
+
messages = [{"role": "system", "content": prompt}]
|
| 126 |
+
tokens_in_messages = get_num_tokens_from_messages(
|
| 127 |
+
messages=messages, model="gpt-3.5-turbo"
|
| 128 |
+
)
|
| 129 |
+
max_tokens = 4096
|
| 130 |
+
tokens_for_response = max_tokens - tokens_in_messages
|
| 131 |
+
|
| 132 |
+
if tokens_for_response < 200:
|
| 133 |
+
return "The code file is too long to analyze. Please select a shorter file."
|
| 134 |
+
|
| 135 |
+
logging.info("Sending request to OpenAI API for code analysis")
|
| 136 |
+
logging.info("Max response tokens: %d", tokens_for_response)
|
| 137 |
+
response = openai.ChatCompletion.create(
|
| 138 |
+
model="gpt-3.5-turbo",
|
| 139 |
+
messages=messages,
|
| 140 |
+
max_tokens=tokens_for_response,
|
| 141 |
+
n=1,
|
| 142 |
+
temperature=0,
|
| 143 |
+
)
|
| 144 |
+
logging.info("Received response from OpenAI API")
|
| 145 |
+
|
| 146 |
+
# Get the assistant's response from the API response
|
| 147 |
+
assistant_response = response.choices[0].message["content"]
|
| 148 |
+
|
| 149 |
+
return assistant_response.strip()
|
repo.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Iterable
|
| 3 |
+
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from git import Repo
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def list_code_files_in_repository(
|
| 9 |
+
repo_url: str, extensions: list[str]
|
| 10 |
+
) -> Iterable[str]:
|
| 11 |
+
"""Clone the GitHub repository and return a list of code files with the specified extensions."""
|
| 12 |
+
local_path = clone_github_repository(repo_url)
|
| 13 |
+
return get_all_files_in_directory(local_path, extensions)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@st.cache_data(show_spinner=False)
|
| 17 |
+
def clone_github_repository(repo_url: str) -> str:
|
| 18 |
+
"""Clone a GitHub repository and return the local path."""
|
| 19 |
+
local_path = repo_url.split("/")[-1]
|
| 20 |
+
|
| 21 |
+
if not os.path.exists(local_path):
|
| 22 |
+
Repo.clone_from(repo_url, local_path)
|
| 23 |
+
|
| 24 |
+
return local_path
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_all_files_in_directory(path: str, extensions: list[str]) -> list[str]:
|
| 28 |
+
"""Return a list of all files in a directory with the specified extension."""
|
| 29 |
+
files = []
|
| 30 |
+
for root, _, filenames in os.walk(path):
|
| 31 |
+
for filename in filenames:
|
| 32 |
+
if any(filename.endswith(ext) for ext in extensions):
|
| 33 |
+
files.append(os.path.join(root, filename))
|
| 34 |
+
return files
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def create_file_tree(code_files: Iterable[str]) -> list[dict[str, str]]:
|
| 38 |
+
file_tree = []
|
| 39 |
+
code_files = sorted(code_files)
|
| 40 |
+
for file in code_files:
|
| 41 |
+
parts = file.split(os.sep)
|
| 42 |
+
current_level = file_tree
|
| 43 |
+
for i, part in enumerate(parts):
|
| 44 |
+
existing = [
|
| 45 |
+
node for node in current_level if node["label"] == part
|
| 46 |
+
]
|
| 47 |
+
if existing:
|
| 48 |
+
current_level = existing[0].setdefault("children", [])
|
| 49 |
+
else:
|
| 50 |
+
new_node = {
|
| 51 |
+
"label": part,
|
| 52 |
+
"value": os.sep.join(parts[: i + 1]),
|
| 53 |
+
}
|
| 54 |
+
current_level.append(new_node)
|
| 55 |
+
if i != len(parts) - 1:
|
| 56 |
+
current_level = new_node.setdefault("children", [])
|
| 57 |
+
return file_tree
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GitPython==3.1.43
|
| 2 |
+
openai==1.26.0
|
| 3 |
+
streamlit==1.34.0
|
| 4 |
+
streamlit_tree_select==0.0.5
|
| 5 |
+
tiktoken==0.6.0
|
utils.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
EXTENSION_TO_LANGUAGE_MAP = {
|
| 5 |
+
".py": "python",
|
| 6 |
+
".js": "javascript",
|
| 7 |
+
".java": "java",
|
| 8 |
+
".cpp": "cpp",
|
| 9 |
+
".c": "c",
|
| 10 |
+
".rb": "ruby",
|
| 11 |
+
".php": "php",
|
| 12 |
+
".cs": "csharp",
|
| 13 |
+
".go": "go",
|
| 14 |
+
".swift": "swift",
|
| 15 |
+
".ts": "typescript",
|
| 16 |
+
".rs": "rust",
|
| 17 |
+
".kt": "kotlin",
|
| 18 |
+
".m": "objective-c",
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def load_environment_variables(file_path: str) -> None:
|
| 23 |
+
if os.path.exists(file_path):
|
| 24 |
+
with open(file_path) as f:
|
| 25 |
+
for line in f:
|
| 26 |
+
if line.strip():
|
| 27 |
+
key, value = line.strip().split("=")
|
| 28 |
+
os.environ[key] = value
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def set_environment_variables() -> None:
|
| 32 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def configure_logging(log_file: str, level: int = logging.INFO) -> None:
|
| 36 |
+
logging.basicConfig(filename=log_file, level=level)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class TempDirContext:
|
| 40 |
+
def __init__(self, temp_dir: str) -> None:
|
| 41 |
+
self.cwd = os.getcwd()
|
| 42 |
+
self.temp_dir = temp_dir
|
| 43 |
+
|
| 44 |
+
def __enter__(self):
|
| 45 |
+
os.makedirs(self.temp_dir, exist_ok=True)
|
| 46 |
+
os.chdir(self.temp_dir)
|
| 47 |
+
|
| 48 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
| 49 |
+
os.chdir(self.cwd)
|