Spaces:

irshadtech10
/

Repo_Analyzer

Sleeping

App Files Files Community

irshadtech10 commited on May 7, 2024

Commit

2f386cc

verified ·

1 Parent(s): 746f3f1

Upload 11 files

Browse files

Files changed (11) hide show

__init__.py +0 -0
about.py +48 -0
app.py +100 -0
display.py +53 -0
download.py +20 -0
forms.py +86 -0
main.py +6 -0
query.py +149 -0
repo.py +57 -0
requirements.txt +5 -0
utils.py +49 -0

__init__.py ADDED Viewed

File without changes

about.py ADDED Viewed

	@@ -0,0 +1,48 @@

+about_section = """
+## About ChatGPT Code Review
+ChatGPT Code Review is an app designed to help software developers improve
+their code quality by leveraging the power of OpenAI's large language models.
+The app analyzes the code in a given GitHub repository and provides
+recommendations to enhance the code. It is a valuable tool for developers,
+allowing them to discover potential issues in their codebase.
+To use ChatGPT Code Review and get recommendations for your code, follow these
+steps:
+1. **Access the app**: Open the ChatGPT Code Review app in your web browser.
+2. **Enter the GitHub repository URL**: In the input field labeled "GitHub
+   Repository URL", enter the URL of the repository you'd like to analyze.
+3. **Enter your OpenAI API Key**: In the input field labeled "OpenAI API Key",
+   enter your OpenAI API key. If you don't have one, you can obtain it from the
+   [OpenAI platform](https://platform.openai.com/account/api-keys).
+4. **Select file extensions**: Choose the file extensions you want to analyze
+   or add additional extensions in the provided input field.
+5. **Clone the repository**: Click the "Clone Repository" button. The app will
+   display the files available for analysis in a tree structure.
+6. **Select files to analyze**: Check the boxes next to the files you want to
+   analyze, then click the "Analyze Files" button.
+7. **Review the recommendations**: The recommendations will be displayed in a
+   clear and structured format, with code snippets and suggested improvements.
+   Review these recommendations to identify potential areas for improvement in
+   your code.
+Keep in mind that ChatGPT Code Review is an AI-powered tool, and while it can
+provide helpful insights, it may not always be perfect. It is essential to use
+your judgment and expertise when assessing the recommendations provided by the
+app.
+<!--don't underline links -->
+<style>
+a {
+   text-decoration: none;
+}
+</style>
+### Links
+- [OpenAI](https://openai.com/)
+- [API Keys](https://platform.openai.com/account/api-keys)
+- [GitHub](https://github.com/domvwt/chatgpt-code-review)
+"""

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import os
+import about
+import display
+import download
+import forms
+import query
+import repo
+import streamlit as st
+import utils
+env_file_path = ".env"
+log_file = "app.log"
+temp_dir = "/tmp/chatgpt-code-review"
+def app():
+    utils.load_environment_variables(env_file_path)
+    utils.set_environment_variables()
+    utils.configure_logging(log_file)
+    with utils.TempDirContext(temp_dir):
+        st.set_page_config(
+            page_title="ChatGPT Code Review",
+        )
+        session_state = st.session_state
+        st.title("ChatGPT Code Review :rocket:")
+        with st.expander("About ChatGPT Code Review"):
+            st.markdown(about.about_section, unsafe_allow_html=True)
+            st.write("")
+        default_repo_url = "https://github.com/domvwt/chatgpt-code-review"
+        repo_form = forms.RepoForm(default_repo_url)
+        with st.form("repo_url_form"):
+            repo_form.display_form()
+        # Check if the API key is valid before proceeding
+        if repo_form.clone_repo_button and not repo_form.is_api_key_valid():
+            st.stop()
+        repo_url, extensions = repo_form.get_form_data()
+        analyze_files_form = forms.AnalyzeFilesForm(session_state)
+        with st.form("analyze_files_form"):
+            if repo_form.clone_repo_button or session_state.get("code_files"):
+                if not session_state.get("code_files"):
+                    session_state.code_files = (
+                        repo.list_code_files_in_repository(
+                            repo_url, extensions
+                        )
+                    )
+                analyze_files_form.display_form()
+        # Analyze the selected files
+        with st.spinner("Analyzing files..."):
+            if session_state.get("analyze_files"):
+                if session_state.get("selected_files"):
+                    recommendations = query.analyze_code_files(
+                        session_state.selected_files
+                    )
+                    # Display the recommendations
+                    st.header("Recommendations")
+                    first = True
+                    recommendation_list = []
+                    for rec in recommendations:
+                        if not first:
+                            st.write("---")
+                        else:
+                            first = False
+                        st.subheader(display.escape_markdown(rec["code_file"]))
+                        recommendation = (
+                            rec["recommendation"] or "No recommendations"
+                        )
+                        st.markdown(recommendation)
+                        with st.expander("View Code"):
+                            extension = os.path.splitext(rec["code_file"])[1]
+                            display.display_code(
+                                rec["code_snippet"], extension
+                            )
+                        recommendation_list.append(rec)
+                    if recommendation_list:
+                        session_state.recommendation_list = recommendation_list
+                else:
+                    st.error("Please select at least one file to analyze.")
+                    st.stop()
+        st.write("")
+        download.download_markdown(session_state.get("recommendation_list"))
+if __name__ == "__main__":
+    app()

display.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import re
+from typing import Optional
+import streamlit as st
+from utils import EXTENSION_TO_LANGUAGE_MAP
+def extension_to_language(file_extension: str) -> Optional[str]:
+    """Return the programming language corresponding to a given file extension."""
+    return EXTENSION_TO_LANGUAGE_MAP.get(file_extension.lower(), None)
+def display_code(code: str, extension: str) -> None:
+    """Display the code snippet in the specified language."""
+    language = extension_to_language(extension)
+    markdown_code = f"```{language}\n{code}\n```"
+    st.markdown(markdown_code)
+def escape_markdown(text: str) -> str:
+    """Escape markdown characters in a string."""
+    escape_chars = [
+        "\\",
+        "`",
+        "*",
+        "_",
+        "{",
+        "}",
+        "[",
+        "]",
+        "(",
+        ")",
+        "#",
+        "+",
+        "-",
+        ".",
+        "!",
+    ]
+    regex = re.compile("|".join(map(re.escape, escape_chars)))
+    return regex.sub(r"\\\g<0>", text)
+def generate_markdown(recommendations):
+    markdown_str = "# ChatGPT Code Review Recommendations\n\n"
+    for rec in recommendations:
+        code_file = rec["code_file"]
+        recommendation = rec["recommendation"] or "No recommendations"
+        markdown_str += f"## {code_file}\n\n"
+        markdown_str += f"{recommendation}\n\n"
+    return markdown_str

download.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import display
+import streamlit as st
+def download_markdown(recommendations):
+    if recommendations:
+        st.download_button(
+            "Download Markdown",
+            data=display.generate_markdown(recommendations),
+            file_name="chatgpt_recommendations.md",
+            mime="text/markdown",
+        )
+    else:
+        st.download_button(
+            "Download Markdown",
+            data="",
+            file_name="chatgpt_recommendations.md",
+            mime="text/markdown",
+            disabled=True,
+        )

forms.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import logging
+import os
+import openai
+import repo
+import streamlit as st
+from streamlit_tree_select import tree_select
+from utils import EXTENSION_TO_LANGUAGE_MAP
+class RepoForm:
+    """A class to encapsulate the repository form and its operations."""
+    options = EXTENSION_TO_LANGUAGE_MAP.keys()
+    def __init__(self, default_repo_url: str):
+        self.default_repo_url = default_repo_url
+        self.repo_url = ""
+        self.api_key = ""
+        self.extensions = []
+        self.additional_extensions = ""
+    def display_form(self):
+        """Displays the repository form and its elements."""
+        self.repo_url = st.text_input(
+            "GitHub Repository URL:", self.default_repo_url
+        )
+        env_api_key = os.getenv("OPENAI_API_KEY", "")
+        self.api_key = st.text_input(
+            "OpenAI API Key:",
+            env_api_key,
+            placeholder="Paste your API key here",
+        )
+        openai.api_key = self.api_key
+        self.extensions = st.multiselect(
+            "File extensions to analyze",
+            options=self.options,
+            default=self.options,
+        )
+        self.additional_extensions = st.text_input(
+            "Additional file extensions to analyze (comma-separated):"
+        )
+        if self.additional_extensions:
+            self.extensions.extend(
+                [ext.strip() for ext in self.additional_extensions.split(",")]
+            )
+        self.clone_repo_button = st.form_submit_button("Clone Repository")
+    def get_form_data(self):
+        """Returns the data captured by the repository form."""
+        return (
+            self.repo_url,
+            self.extensions,
+        )
+    def is_api_key_valid(self):
+        """Checks if the OpenAI API key is valid and returns a boolean value."""
+        if not self.api_key:
+            st.error("Please enter your OpenAI API key.")
+            return False
+        return True
+class AnalyzeFilesForm:
+    """A class to encapsulate the analyze files form and its operations."""
+    def __init__(self, session_state):
+        self.session_state = session_state
+    def display_form(self):
+        """Displays the analyze files form and its elements."""
+        st.write("Select files to analyze:")
+        file_tree = repo.create_file_tree(self.session_state.code_files)
+        self.session_state.selected_files = tree_select(
+            file_tree,
+            show_expand_all=True,
+            check_model="leaf",
+            checked=self.session_state.get("selected_files"),
+        )["checked"]
+        logging.info("Selected files: %s", self.session_state.selected_files)
+        self.session_state.analyze_files = st.form_submit_button(
+            "Analyze Files"
+        ) or self.session_state.get("analyze_files")

main.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# NOTE: Keeping this so we don't have to redeploy the Streamlit app
+from app import app
+if __name__ == "__main__":
+    app()

query.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import logging
+from textwrap import dedent
+from typing import Iterable
+import openai
+import streamlit as st
+import tiktoken
+def analyze_code_files(code_files: list[str]) -> Iterable[dict[str, str]]:
+    """Analyze the selected code files and return recommendations."""
+    return (analyze_code_file(code_file) for code_file in code_files)
+def analyze_code_file(code_file: str) -> dict[str, str]:
+    """Analyze a code file and return a dictionary with file information and recommendations."""
+    with open(code_file, "r") as f:
+        code_content = f.read()
+    if not code_content:
+        return {
+            "code_file": code_file,
+            "code_snippet": code_content,
+            "recommendation": "No code found in file",
+        }
+    try:
+        logging.info("Analyzing code file: %s", code_file)
+        analysis = get_code_analysis(code_content)
+    except Exception as e:
+        logging.info("Error analyzing code file: %s", code_file)
+        analysis = f"Error analyzing code file: {e}"
+    return {
+        "code_file": code_file,
+        "code_snippet": code_content,
+        "recommendation": analysis,
+    }
+def get_num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
+    """Returns the number of tokens used by a list of messages."""
+    # Source: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        logging.debug("Model not found. Using cl100k_base encoding.")
+        encoding = tiktoken.get_encoding("cl100k_base")
+    if model == "gpt-3.5-turbo":
+        logging.debug(
+            "gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301."
+        )
+        return get_num_tokens_from_messages(
+            messages, model="gpt-3.5-turbo-0301"
+        )
+    elif model == "gpt-4":
+        logging.debug(
+            "gpt-4 may change over time. Returning num tokens assuming gpt-4-0314."
+        )
+        return get_num_tokens_from_messages(messages, model="gpt-4-0314")
+    elif model == "gpt-3.5-turbo-0301":
+        tokens_per_message = (
+            4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
+        )
+        tokens_per_name = -1  # if there's a name, the role is omitted
+    elif model == "gpt-4-0314":
+        tokens_per_message = 3
+        tokens_per_name = 1
+    else:
+        raise NotImplementedError(
+            f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
+        )
+    num_tokens = 0
+    for message in messages:
+        num_tokens += tokens_per_message
+        for key, value in message.items():
+            num_tokens += len(encoding.encode(value))
+            if key == "name":
+                num_tokens += tokens_per_name
+    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
+    return num_tokens
+@st.cache_data(show_spinner=False)
+def get_code_analysis(code: str) -> str:
+    """Get code analysis from the OpenAI API."""
+    prompt = dedent(
+        f"""\
+Please review the code below and identify any syntax or logical errors, suggest
+ways to refactor and improve code quality, enhance performance, address security
+concerns, and align with best practices. Provide specific examples for each area
+and limit your recommendations to three per category.
+Use the following response format, keeping the section headings as-is, and provide
+your feedback. Use bullet points for each response. The provided examples are for
+illustration purposes only and should not be repeated.
+**Syntax and logical errors (example)**:
+- Incorrect indentation on line 12
+- Missing closing parenthesis on line 23
+**Code refactoring and quality (example)**:
+- Replace multiple if-else statements with a switch case for readability
+- Extract repetitive code into separate functions
+**Performance optimization (example)**:
+- Use a more efficient sorting algorithm to reduce time complexity
+- Cache results of expensive operations for reuse
+**Security vulnerabilities (example)**:
+- Sanitize user input to prevent SQL injection attacks
+- Use prepared statements for database queries
+**Best practices (example)**:
+- Add meaningful comments and documentation to explain the code
+- Follow consistent naming conventions for variables and functions
+Code:
+```
+{code}
+```
+Your review:"""
+    )
+    messages = [{"role": "system", "content": prompt}]
+    tokens_in_messages = get_num_tokens_from_messages(
+        messages=messages, model="gpt-3.5-turbo"
+    )
+    max_tokens = 4096
+    tokens_for_response = max_tokens - tokens_in_messages
+    if tokens_for_response < 200:
+        return "The code file is too long to analyze. Please select a shorter file."
+    logging.info("Sending request to OpenAI API for code analysis")
+    logging.info("Max response tokens: %d", tokens_for_response)
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=messages,
+        max_tokens=tokens_for_response,
+        n=1,
+        temperature=0,
+    )
+    logging.info("Received response from OpenAI API")
+    # Get the assistant's response from the API response
+    assistant_response = response.choices[0].message["content"]
+    return assistant_response.strip()

repo.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+from typing import Iterable
+import streamlit as st
+from git import Repo
+def list_code_files_in_repository(
+    repo_url: str, extensions: list[str]
+) -> Iterable[str]:
+    """Clone the GitHub repository and return a list of code files with the specified extensions."""
+    local_path = clone_github_repository(repo_url)
+    return get_all_files_in_directory(local_path, extensions)
+@st.cache_data(show_spinner=False)
+def clone_github_repository(repo_url: str) -> str:
+    """Clone a GitHub repository and return the local path."""
+    local_path = repo_url.split("/")[-1]
+    if not os.path.exists(local_path):
+        Repo.clone_from(repo_url, local_path)
+    return local_path
+def get_all_files_in_directory(path: str, extensions: list[str]) -> list[str]:
+    """Return a list of all files in a directory with the specified extension."""
+    files = []
+    for root, _, filenames in os.walk(path):
+        for filename in filenames:
+            if any(filename.endswith(ext) for ext in extensions):
+                files.append(os.path.join(root, filename))
+    return files
+def create_file_tree(code_files: Iterable[str]) -> list[dict[str, str]]:
+    file_tree = []
+    code_files = sorted(code_files)
+    for file in code_files:
+        parts = file.split(os.sep)
+        current_level = file_tree
+        for i, part in enumerate(parts):
+            existing = [
+                node for node in current_level if node["label"] == part
+            ]
+            if existing:
+                current_level = existing[0].setdefault("children", [])
+            else:
+                new_node = {
+                    "label": part,
+                    "value": os.sep.join(parts[: i + 1]),
+                }
+                current_level.append(new_node)
+                if i != len(parts) - 1:
+                    current_level = new_node.setdefault("children", [])
+    return file_tree

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+GitPython==3.1.43
+openai==1.26.0
+streamlit==1.34.0
+streamlit_tree_select==0.0.5
+tiktoken==0.6.0

utils.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import logging
+import os
+EXTENSION_TO_LANGUAGE_MAP = {
+    ".py": "python",
+    ".js": "javascript",
+    ".java": "java",
+    ".cpp": "cpp",
+    ".c": "c",
+    ".rb": "ruby",
+    ".php": "php",
+    ".cs": "csharp",
+    ".go": "go",
+    ".swift": "swift",
+    ".ts": "typescript",
+    ".rs": "rust",
+    ".kt": "kotlin",
+    ".m": "objective-c",
+}
+def load_environment_variables(file_path: str) -> None:
+    if os.path.exists(file_path):
+        with open(file_path) as f:
+            for line in f:
+                if line.strip():
+                    key, value = line.strip().split("=")
+                    os.environ[key] = value
+def set_environment_variables() -> None:
+    os.environ["TOKENIZERS_PARALLELISM"] = "false"
+def configure_logging(log_file: str, level: int = logging.INFO) -> None:
+    logging.basicConfig(filename=log_file, level=level)
+class TempDirContext:
+    def __init__(self, temp_dir: str) -> None:
+        self.cwd = os.getcwd()
+        self.temp_dir = temp_dir
+    def __enter__(self):
+        os.makedirs(self.temp_dir, exist_ok=True)
+        os.chdir(self.temp_dir)
+    def __exit__(self, exc_type, exc_value, traceback):
+        os.chdir(self.cwd)