irshadtech10 commited on
Commit
2f386cc
·
verified ·
1 Parent(s): 746f3f1

Upload 11 files

Browse files
Files changed (11) hide show
  1. __init__.py +0 -0
  2. about.py +48 -0
  3. app.py +100 -0
  4. display.py +53 -0
  5. download.py +20 -0
  6. forms.py +86 -0
  7. main.py +6 -0
  8. query.py +149 -0
  9. repo.py +57 -0
  10. requirements.txt +5 -0
  11. utils.py +49 -0
__init__.py ADDED
File without changes
about.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ about_section = """
2
+ ## About ChatGPT Code Review
3
+
4
+ ChatGPT Code Review is an app designed to help software developers improve
5
+ their code quality by leveraging the power of OpenAI's large language models.
6
+ The app analyzes the code in a given GitHub repository and provides
7
+ recommendations to enhance the code. It is a valuable tool for developers,
8
+ allowing them to discover potential issues in their codebase.
9
+
10
+ To use ChatGPT Code Review and get recommendations for your code, follow these
11
+ steps:
12
+
13
+ 1. **Access the app**: Open the ChatGPT Code Review app in your web browser.
14
+ 2. **Enter the GitHub repository URL**: In the input field labeled "GitHub
15
+ Repository URL", enter the URL of the repository you'd like to analyze.
16
+ 3. **Enter your OpenAI API Key**: In the input field labeled "OpenAI API Key",
17
+ enter your OpenAI API key. If you don't have one, you can obtain it from the
18
+ [OpenAI platform](https://platform.openai.com/account/api-keys).
19
+ 4. **Select file extensions**: Choose the file extensions you want to analyze
20
+ or add additional extensions in the provided input field.
21
+ 5. **Clone the repository**: Click the "Clone Repository" button. The app will
22
+ display the files available for analysis in a tree structure.
23
+ 6. **Select files to analyze**: Check the boxes next to the files you want to
24
+ analyze, then click the "Analyze Files" button.
25
+ 7. **Review the recommendations**: The recommendations will be displayed in a
26
+ clear and structured format, with code snippets and suggested improvements.
27
+ Review these recommendations to identify potential areas for improvement in
28
+ your code.
29
+
30
+ Keep in mind that ChatGPT Code Review is an AI-powered tool, and while it can
31
+ provide helpful insights, it may not always be perfect. It is essential to use
32
+ your judgment and expertise when assessing the recommendations provided by the
33
+ app.
34
+
35
+ <!--don't underline links -->
36
+ <style>
37
+ a {
38
+ text-decoration: none;
39
+ }
40
+ </style>
41
+
42
+ ### Links
43
+
44
+ - [OpenAI](https://openai.com/)
45
+ - [API Keys](https://platform.openai.com/account/api-keys)
46
+ - [GitHub](https://github.com/domvwt/chatgpt-code-review)
47
+
48
+ """
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import about
4
+ import display
5
+ import download
6
+ import forms
7
+ import query
8
+ import repo
9
+ import streamlit as st
10
+ import utils
11
+
12
+ env_file_path = ".env"
13
+ log_file = "app.log"
14
+
15
+
16
+ temp_dir = "/tmp/chatgpt-code-review"
17
+
18
+
19
+ def app():
20
+ utils.load_environment_variables(env_file_path)
21
+ utils.set_environment_variables()
22
+ utils.configure_logging(log_file)
23
+
24
+ with utils.TempDirContext(temp_dir):
25
+ st.set_page_config(
26
+ page_title="ChatGPT Code Review",
27
+ )
28
+
29
+ session_state = st.session_state
30
+
31
+ st.title("ChatGPT Code Review :rocket:")
32
+
33
+ with st.expander("About ChatGPT Code Review"):
34
+ st.markdown(about.about_section, unsafe_allow_html=True)
35
+ st.write("")
36
+
37
+ default_repo_url = "https://github.com/domvwt/chatgpt-code-review"
38
+ repo_form = forms.RepoForm(default_repo_url)
39
+ with st.form("repo_url_form"):
40
+ repo_form.display_form()
41
+
42
+ # Check if the API key is valid before proceeding
43
+ if repo_form.clone_repo_button and not repo_form.is_api_key_valid():
44
+ st.stop()
45
+
46
+ repo_url, extensions = repo_form.get_form_data()
47
+
48
+ analyze_files_form = forms.AnalyzeFilesForm(session_state)
49
+ with st.form("analyze_files_form"):
50
+ if repo_form.clone_repo_button or session_state.get("code_files"):
51
+ if not session_state.get("code_files"):
52
+ session_state.code_files = (
53
+ repo.list_code_files_in_repository(
54
+ repo_url, extensions
55
+ )
56
+ )
57
+
58
+ analyze_files_form.display_form()
59
+
60
+ # Analyze the selected files
61
+ with st.spinner("Analyzing files..."):
62
+ if session_state.get("analyze_files"):
63
+ if session_state.get("selected_files"):
64
+ recommendations = query.analyze_code_files(
65
+ session_state.selected_files
66
+ )
67
+
68
+ # Display the recommendations
69
+ st.header("Recommendations")
70
+ first = True
71
+ recommendation_list = []
72
+ for rec in recommendations:
73
+ if not first:
74
+ st.write("---")
75
+ else:
76
+ first = False
77
+ st.subheader(display.escape_markdown(rec["code_file"]))
78
+ recommendation = (
79
+ rec["recommendation"] or "No recommendations"
80
+ )
81
+ st.markdown(recommendation)
82
+ with st.expander("View Code"):
83
+ extension = os.path.splitext(rec["code_file"])[1]
84
+ display.display_code(
85
+ rec["code_snippet"], extension
86
+ )
87
+ recommendation_list.append(rec)
88
+ if recommendation_list:
89
+ session_state.recommendation_list = recommendation_list
90
+ else:
91
+ st.error("Please select at least one file to analyze.")
92
+ st.stop()
93
+
94
+ st.write("")
95
+
96
+ download.download_markdown(session_state.get("recommendation_list"))
97
+
98
+
99
+ if __name__ == "__main__":
100
+ app()
display.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import Optional
3
+
4
+ import streamlit as st
5
+ from utils import EXTENSION_TO_LANGUAGE_MAP
6
+
7
+
8
+ def extension_to_language(file_extension: str) -> Optional[str]:
9
+ """Return the programming language corresponding to a given file extension."""
10
+ return EXTENSION_TO_LANGUAGE_MAP.get(file_extension.lower(), None)
11
+
12
+
13
+ def display_code(code: str, extension: str) -> None:
14
+ """Display the code snippet in the specified language."""
15
+ language = extension_to_language(extension)
16
+ markdown_code = f"```{language}\n{code}\n```"
17
+ st.markdown(markdown_code)
18
+
19
+
20
+ def escape_markdown(text: str) -> str:
21
+ """Escape markdown characters in a string."""
22
+ escape_chars = [
23
+ "\\",
24
+ "`",
25
+ "*",
26
+ "_",
27
+ "{",
28
+ "}",
29
+ "[",
30
+ "]",
31
+ "(",
32
+ ")",
33
+ "#",
34
+ "+",
35
+ "-",
36
+ ".",
37
+ "!",
38
+ ]
39
+ regex = re.compile("|".join(map(re.escape, escape_chars)))
40
+ return regex.sub(r"\\\g<0>", text)
41
+
42
+
43
+ def generate_markdown(recommendations):
44
+ markdown_str = "# ChatGPT Code Review Recommendations\n\n"
45
+
46
+ for rec in recommendations:
47
+ code_file = rec["code_file"]
48
+ recommendation = rec["recommendation"] or "No recommendations"
49
+
50
+ markdown_str += f"## {code_file}\n\n"
51
+ markdown_str += f"{recommendation}\n\n"
52
+
53
+ return markdown_str
download.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import display
2
+ import streamlit as st
3
+
4
+
5
+ def download_markdown(recommendations):
6
+ if recommendations:
7
+ st.download_button(
8
+ "Download Markdown",
9
+ data=display.generate_markdown(recommendations),
10
+ file_name="chatgpt_recommendations.md",
11
+ mime="text/markdown",
12
+ )
13
+ else:
14
+ st.download_button(
15
+ "Download Markdown",
16
+ data="",
17
+ file_name="chatgpt_recommendations.md",
18
+ mime="text/markdown",
19
+ disabled=True,
20
+ )
forms.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+
4
+ import openai
5
+ import repo
6
+ import streamlit as st
7
+ from streamlit_tree_select import tree_select
8
+ from utils import EXTENSION_TO_LANGUAGE_MAP
9
+
10
+
11
+ class RepoForm:
12
+ """A class to encapsulate the repository form and its operations."""
13
+
14
+ options = EXTENSION_TO_LANGUAGE_MAP.keys()
15
+
16
+ def __init__(self, default_repo_url: str):
17
+ self.default_repo_url = default_repo_url
18
+ self.repo_url = ""
19
+ self.api_key = ""
20
+ self.extensions = []
21
+ self.additional_extensions = ""
22
+
23
+ def display_form(self):
24
+ """Displays the repository form and its elements."""
25
+ self.repo_url = st.text_input(
26
+ "GitHub Repository URL:", self.default_repo_url
27
+ )
28
+
29
+ env_api_key = os.getenv("OPENAI_API_KEY", "")
30
+ self.api_key = st.text_input(
31
+ "OpenAI API Key:",
32
+ env_api_key,
33
+ placeholder="Paste your API key here",
34
+ )
35
+ openai.api_key = self.api_key
36
+
37
+ self.extensions = st.multiselect(
38
+ "File extensions to analyze",
39
+ options=self.options,
40
+ default=self.options,
41
+ )
42
+ self.additional_extensions = st.text_input(
43
+ "Additional file extensions to analyze (comma-separated):"
44
+ )
45
+ if self.additional_extensions:
46
+ self.extensions.extend(
47
+ [ext.strip() for ext in self.additional_extensions.split(",")]
48
+ )
49
+
50
+ self.clone_repo_button = st.form_submit_button("Clone Repository")
51
+
52
+ def get_form_data(self):
53
+ """Returns the data captured by the repository form."""
54
+ return (
55
+ self.repo_url,
56
+ self.extensions,
57
+ )
58
+
59
+ def is_api_key_valid(self):
60
+ """Checks if the OpenAI API key is valid and returns a boolean value."""
61
+ if not self.api_key:
62
+ st.error("Please enter your OpenAI API key.")
63
+ return False
64
+ return True
65
+
66
+
67
+ class AnalyzeFilesForm:
68
+ """A class to encapsulate the analyze files form and its operations."""
69
+
70
+ def __init__(self, session_state):
71
+ self.session_state = session_state
72
+
73
+ def display_form(self):
74
+ """Displays the analyze files form and its elements."""
75
+ st.write("Select files to analyze:")
76
+ file_tree = repo.create_file_tree(self.session_state.code_files)
77
+ self.session_state.selected_files = tree_select(
78
+ file_tree,
79
+ show_expand_all=True,
80
+ check_model="leaf",
81
+ checked=self.session_state.get("selected_files"),
82
+ )["checked"]
83
+ logging.info("Selected files: %s", self.session_state.selected_files)
84
+ self.session_state.analyze_files = st.form_submit_button(
85
+ "Analyze Files"
86
+ ) or self.session_state.get("analyze_files")
main.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # NOTE: Keeping this so we don't have to redeploy the Streamlit app
2
+ from app import app
3
+
4
+
5
+ if __name__ == "__main__":
6
+ app()
query.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from textwrap import dedent
3
+ from typing import Iterable
4
+
5
+ import openai
6
+ import streamlit as st
7
+ import tiktoken
8
+
9
+
10
+ def analyze_code_files(code_files: list[str]) -> Iterable[dict[str, str]]:
11
+ """Analyze the selected code files and return recommendations."""
12
+ return (analyze_code_file(code_file) for code_file in code_files)
13
+
14
+
15
+ def analyze_code_file(code_file: str) -> dict[str, str]:
16
+ """Analyze a code file and return a dictionary with file information and recommendations."""
17
+ with open(code_file, "r") as f:
18
+ code_content = f.read()
19
+
20
+ if not code_content:
21
+ return {
22
+ "code_file": code_file,
23
+ "code_snippet": code_content,
24
+ "recommendation": "No code found in file",
25
+ }
26
+
27
+ try:
28
+ logging.info("Analyzing code file: %s", code_file)
29
+ analysis = get_code_analysis(code_content)
30
+ except Exception as e:
31
+ logging.info("Error analyzing code file: %s", code_file)
32
+ analysis = f"Error analyzing code file: {e}"
33
+
34
+ return {
35
+ "code_file": code_file,
36
+ "code_snippet": code_content,
37
+ "recommendation": analysis,
38
+ }
39
+
40
+
41
+ def get_num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
42
+ """Returns the number of tokens used by a list of messages."""
43
+ # Source: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
44
+ try:
45
+ encoding = tiktoken.encoding_for_model(model)
46
+ except KeyError:
47
+ logging.debug("Model not found. Using cl100k_base encoding.")
48
+ encoding = tiktoken.get_encoding("cl100k_base")
49
+ if model == "gpt-3.5-turbo":
50
+ logging.debug(
51
+ "gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301."
52
+ )
53
+ return get_num_tokens_from_messages(
54
+ messages, model="gpt-3.5-turbo-0301"
55
+ )
56
+ elif model == "gpt-4":
57
+ logging.debug(
58
+ "gpt-4 may change over time. Returning num tokens assuming gpt-4-0314."
59
+ )
60
+ return get_num_tokens_from_messages(messages, model="gpt-4-0314")
61
+ elif model == "gpt-3.5-turbo-0301":
62
+ tokens_per_message = (
63
+ 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
64
+ )
65
+ tokens_per_name = -1 # if there's a name, the role is omitted
66
+ elif model == "gpt-4-0314":
67
+ tokens_per_message = 3
68
+ tokens_per_name = 1
69
+ else:
70
+ raise NotImplementedError(
71
+ f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
72
+ )
73
+ num_tokens = 0
74
+ for message in messages:
75
+ num_tokens += tokens_per_message
76
+ for key, value in message.items():
77
+ num_tokens += len(encoding.encode(value))
78
+ if key == "name":
79
+ num_tokens += tokens_per_name
80
+ num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
81
+ return num_tokens
82
+
83
+
84
+ @st.cache_data(show_spinner=False)
85
+ def get_code_analysis(code: str) -> str:
86
+ """Get code analysis from the OpenAI API."""
87
+ prompt = dedent(
88
+ f"""\
89
+ Please review the code below and identify any syntax or logical errors, suggest
90
+ ways to refactor and improve code quality, enhance performance, address security
91
+ concerns, and align with best practices. Provide specific examples for each area
92
+ and limit your recommendations to three per category.
93
+
94
+ Use the following response format, keeping the section headings as-is, and provide
95
+ your feedback. Use bullet points for each response. The provided examples are for
96
+ illustration purposes only and should not be repeated.
97
+
98
+ **Syntax and logical errors (example)**:
99
+ - Incorrect indentation on line 12
100
+ - Missing closing parenthesis on line 23
101
+
102
+ **Code refactoring and quality (example)**:
103
+ - Replace multiple if-else statements with a switch case for readability
104
+ - Extract repetitive code into separate functions
105
+
106
+ **Performance optimization (example)**:
107
+ - Use a more efficient sorting algorithm to reduce time complexity
108
+ - Cache results of expensive operations for reuse
109
+
110
+ **Security vulnerabilities (example)**:
111
+ - Sanitize user input to prevent SQL injection attacks
112
+ - Use prepared statements for database queries
113
+
114
+ **Best practices (example)**:
115
+ - Add meaningful comments and documentation to explain the code
116
+ - Follow consistent naming conventions for variables and functions
117
+
118
+ Code:
119
+ ```
120
+ {code}
121
+ ```
122
+
123
+ Your review:"""
124
+ )
125
+ messages = [{"role": "system", "content": prompt}]
126
+ tokens_in_messages = get_num_tokens_from_messages(
127
+ messages=messages, model="gpt-3.5-turbo"
128
+ )
129
+ max_tokens = 4096
130
+ tokens_for_response = max_tokens - tokens_in_messages
131
+
132
+ if tokens_for_response < 200:
133
+ return "The code file is too long to analyze. Please select a shorter file."
134
+
135
+ logging.info("Sending request to OpenAI API for code analysis")
136
+ logging.info("Max response tokens: %d", tokens_for_response)
137
+ response = openai.ChatCompletion.create(
138
+ model="gpt-3.5-turbo",
139
+ messages=messages,
140
+ max_tokens=tokens_for_response,
141
+ n=1,
142
+ temperature=0,
143
+ )
144
+ logging.info("Received response from OpenAI API")
145
+
146
+ # Get the assistant's response from the API response
147
+ assistant_response = response.choices[0].message["content"]
148
+
149
+ return assistant_response.strip()
repo.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Iterable
3
+
4
+ import streamlit as st
5
+ from git import Repo
6
+
7
+
8
+ def list_code_files_in_repository(
9
+ repo_url: str, extensions: list[str]
10
+ ) -> Iterable[str]:
11
+ """Clone the GitHub repository and return a list of code files with the specified extensions."""
12
+ local_path = clone_github_repository(repo_url)
13
+ return get_all_files_in_directory(local_path, extensions)
14
+
15
+
16
+ @st.cache_data(show_spinner=False)
17
+ def clone_github_repository(repo_url: str) -> str:
18
+ """Clone a GitHub repository and return the local path."""
19
+ local_path = repo_url.split("/")[-1]
20
+
21
+ if not os.path.exists(local_path):
22
+ Repo.clone_from(repo_url, local_path)
23
+
24
+ return local_path
25
+
26
+
27
+ def get_all_files_in_directory(path: str, extensions: list[str]) -> list[str]:
28
+ """Return a list of all files in a directory with the specified extension."""
29
+ files = []
30
+ for root, _, filenames in os.walk(path):
31
+ for filename in filenames:
32
+ if any(filename.endswith(ext) for ext in extensions):
33
+ files.append(os.path.join(root, filename))
34
+ return files
35
+
36
+
37
+ def create_file_tree(code_files: Iterable[str]) -> list[dict[str, str]]:
38
+ file_tree = []
39
+ code_files = sorted(code_files)
40
+ for file in code_files:
41
+ parts = file.split(os.sep)
42
+ current_level = file_tree
43
+ for i, part in enumerate(parts):
44
+ existing = [
45
+ node for node in current_level if node["label"] == part
46
+ ]
47
+ if existing:
48
+ current_level = existing[0].setdefault("children", [])
49
+ else:
50
+ new_node = {
51
+ "label": part,
52
+ "value": os.sep.join(parts[: i + 1]),
53
+ }
54
+ current_level.append(new_node)
55
+ if i != len(parts) - 1:
56
+ current_level = new_node.setdefault("children", [])
57
+ return file_tree
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ GitPython==3.1.43
2
+ openai==1.26.0
3
+ streamlit==1.34.0
4
+ streamlit_tree_select==0.0.5
5
+ tiktoken==0.6.0
utils.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+
4
+ EXTENSION_TO_LANGUAGE_MAP = {
5
+ ".py": "python",
6
+ ".js": "javascript",
7
+ ".java": "java",
8
+ ".cpp": "cpp",
9
+ ".c": "c",
10
+ ".rb": "ruby",
11
+ ".php": "php",
12
+ ".cs": "csharp",
13
+ ".go": "go",
14
+ ".swift": "swift",
15
+ ".ts": "typescript",
16
+ ".rs": "rust",
17
+ ".kt": "kotlin",
18
+ ".m": "objective-c",
19
+ }
20
+
21
+
22
+ def load_environment_variables(file_path: str) -> None:
23
+ if os.path.exists(file_path):
24
+ with open(file_path) as f:
25
+ for line in f:
26
+ if line.strip():
27
+ key, value = line.strip().split("=")
28
+ os.environ[key] = value
29
+
30
+
31
+ def set_environment_variables() -> None:
32
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
33
+
34
+
35
+ def configure_logging(log_file: str, level: int = logging.INFO) -> None:
36
+ logging.basicConfig(filename=log_file, level=level)
37
+
38
+
39
+ class TempDirContext:
40
+ def __init__(self, temp_dir: str) -> None:
41
+ self.cwd = os.getcwd()
42
+ self.temp_dir = temp_dir
43
+
44
+ def __enter__(self):
45
+ os.makedirs(self.temp_dir, exist_ok=True)
46
+ os.chdir(self.temp_dir)
47
+
48
+ def __exit__(self, exc_type, exc_value, traceback):
49
+ os.chdir(self.cwd)