Spaces:

irshadtech10
/

Repo_Analyzer

Sleeping

App Files Files Community

Repo_Analyzer / query.py

irshadtech10

Upload 10 files

85d744c verified over 1 year ago

raw

history blame contribute delete

5.54 kB

	import logging
	from textwrap import dedent
	from typing import Iterable
	import os
	from dotenv import load_dotenv
	from openai import OpenAI
	import streamlit as st
	import tiktoken

	load_dotenv()
	client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
	def analyze_code_files(code_files: list[str]) -> Iterable[dict[str, str]]:
	"""Analyze the selected code files and return recommendations."""
	return (analyze_code_file(code_file) for code_file in code_files)


	def analyze_code_file(code_file: str) -> dict[str, str]:
	"""Analyze a code file and return a dictionary with file information and recommendations."""
	with open(code_file, "r") as f:
	code_content = f.read()

	if not code_content:
	return {
	"code_file": code_file,
	"code_snippet": code_content,
	"recommendation": "No code found in file",
	}

	try:
	logging.info("Analyzing code file: %s", code_file)
	analysis = get_code_analysis(code_content)
	except Exception as e:
	logging.info("Error analyzing code file: %s", code_file)
	analysis = f"Error analyzing code file: {e}"

	return {
	"code_file": code_file,
	"code_snippet": code_content,
	"recommendation": analysis,
	}


	def get_num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
	"""Returns the number of tokens used by a list of messages."""
	# Source: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
	try:
	encoding = tiktoken.encoding_for_model(model)
	except KeyError:
	logging.debug("Model not found. Using cl100k_base encoding.")
	encoding = tiktoken.get_encoding("cl100k_base")
	if model == "gpt-3.5-turbo":
	logging.debug(
	"gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301."
	)
	return get_num_tokens_from_messages(
	messages, model="gpt-3.5-turbo-0301"
	)
	elif model == "gpt-4":
	logging.debug(
	"gpt-4 may change over time. Returning num tokens assuming gpt-4-0314."
	)
	return get_num_tokens_from_messages(messages, model="gpt-4-0314")
	elif model == "gpt-3.5-turbo-0301":
	tokens_per_message = (
	4 # every message follows <\|start\|>{role/name}\n{content}<\|end\|>\n
	)
	tokens_per_name = -1 # if there's a name, the role is omitted
	elif model == "gpt-4-0314":
	tokens_per_message = 3
	tokens_per_name = 1
	else:
	raise NotImplementedError(
	f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
	)
	num_tokens = 0
	for message in messages:
	num_tokens += tokens_per_message
	for key, value in message.items():
	num_tokens += len(encoding.encode(value))
	if key == "name":
	num_tokens += tokens_per_name
	num_tokens += 3 # every reply is primed with <\|start\|>assistant<\|message\|>
	return num_tokens


	@st.cache_data(show_spinner=False)
	def get_code_analysis(code: str) -> str:
	"""Get code analysis from the OpenAI API."""
	prompt = dedent(
	f"""\
	Please review the code below and identify any syntax or logical errors, suggest
	ways to refactor and improve code quality, enhance performance, address security
	concerns, and align with best practices. Provide specific examples for each area
	and limit your recommendations to three per category.

	Use the following response format, keeping the section headings as-is, and provide
	your feedback. Use bullet points for each response. The provided examples are for
	illustration purposes only and should not be repeated.

	Syntax and logical errors (example):
	- Incorrect indentation on line 12
	- Missing closing parenthesis on line 23

	Code refactoring and quality (example):
	- Replace multiple if-else statements with a switch case for readability
	- Extract repetitive code into separate functions

	Performance optimization (example):
	- Use a more efficient sorting algorithm to reduce time complexity
	- Cache results of expensive operations for reuse

	Security vulnerabilities (example):
	- Sanitize user input to prevent SQL injection attacks
	- Use prepared statements for database queries

	Best practices (example):
	- Add meaningful comments and documentation to explain the code
	- Follow consistent naming conventions for variables and functions

	Code:
	```
	{code}
	```

	Your review:"""
	)
	messages = [{"role": "system", "content": prompt}]
	tokens_in_messages = get_num_tokens_from_messages(
	messages=messages, model="gpt-3.5-turbo"
	)
	max_tokens = 4096
	tokens_for_response = max_tokens - tokens_in_messages

	if tokens_for_response < 200:
	return "The code file is too long to analyze. Please select a shorter file."

	logging.info("Sending request to OpenAI API for code analysis")
	logging.info("Max response tokens: %d", tokens_for_response)
	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=messages,
	max_tokens=tokens_for_response,
	n=1,
	temperature=0,
	)
	logging.info("Received response from OpenAI API")

	# Get the assistant's response from the API response
	assistant_response = response.choices[0].message.content

	return assistant_response.strip()