Spaces:

manoj1hcl
/

AgenticReviewerSFT

Sleeping

App Files Files Community

AgenticReviewerSFT / app.py

manoj1hcl

Update app.py

0a24236 verified 8 months ago

raw

history blame contribute delete

11.7 kB

	# app.py

	import json
	import re
	import io
	import os
	import openai
	import gradio as gr
	from google.oauth2 import service_account
	from googleapiclient.discovery import build
	from googleapiclient.errors import HttpError
	from googleapiclient.http import MediaIoBaseDownload

	# --- Step 1: Load Secrets and Initialize API Clients ---
	# This part runs once when the application starts on Hugging Face.
	print("Initializing API clients from secrets...")

	# Load OpenAI API Key from Hugging Face Secrets
	openai_api_key = os.getenv('OPENAI_API_KEY')
	print(openai_api_key)
	if not openai_api_key:
	print("❌ ERROR: Secret 'OPENAI_API_KEY' not found.")
	openai_client = None
	else:
	openai_client = openai.OpenAI(api_key=openai_api_key)
	print("✅ Successfully initialized OpenAI client.")

	# Load Google Credentials from Hugging Face Secrets
	google_creds_json = os.getenv('GOOGLE_CREDS_JSON')
	drive_service = None
	if not google_creds_json:
	print("❌ ERROR: Secret 'GOOGLE_CREDS_JSON' not found.")
	else:
	try:
	# The secret is a string, so we need to parse it into a dictionary
	creds_dict = json.loads(google_creds_json)
	# Define the necessary scope to read files
	scopes = ['https://www.googleapis.com/auth/drive.readonly']

	# Create credentials from the service account info
	creds = service_account.Credentials.from_service_account_info(creds_dict, scopes=scopes)
	drive_service = build('drive', 'v3', credentials=creds)
	print("✅ Successfully authenticated and created Google Drive service client.")
	except Exception as e:
	print(f"❌ An error occurred during Google authentication: {e}")


	# --- Step 2: Define all the core functions (mostly unchanged) ---

	def get_private_notebook_from_drive(colab_link):
	"""Downloads a private .ipynb file from a Google Drive link."""
	if not drive_service:
	raise gr.Error("Google Drive service is not available. Check server logs.")

	print(f"\nAttempting to fetch notebook from: {colab_link}")
	try:
	file_id = colab_link.split('/drive/')[1].split('#')[0]
	except IndexError:
	raise gr.Error("Invalid Google Drive URL format. Could not extract file ID.")

	try:
	request = drive_service.files().get_media(fileId=file_id)
	file_io = io.BytesIO()
	downloader = MediaIoBaseDownload(file_io, request)
	done = False
	while not done:
	status, done = downloader.next_chunk()

	notebook_content_str = file_io.getvalue().decode('utf-8')
	print("✅ Notebook downloaded successfully.")
	return json.loads(notebook_content_str)
	except HttpError as error:
	# This error often means the service account doesn't have permission
	if error.resp.status == 404 or error.resp.status == 403:
	raise gr.Error(f"Notebook not found or permission denied. Have you shared the notebook with the service account's email?")
	raise gr.Error(f"An API error occurred while fetching the notebook: {error}")
	except Exception as e:
	raise gr.Error(f"An unexpected error occurred while downloading from Drive: {e}")

	def parse_colab_notebook(notebook_json):
	"""Parses the notebook's JSON content into a dictionary."""
	if not notebook_json:
	return None
	print("\nParsing notebook content...")
	parsed_content = {
	"prompt": "Not found.", "metadata": "Not found.", "response": "Not found.",
	"reasoning_chains": "Not found.", "cpp_code": "Not found."
	}
	reasoning_chains_list = []
	for cell in notebook_json.get('cells', []):
	if cell.get('cell_type') == 'markdown':
	cell_source = "".join(cell.get('source', []))
	first_line = cell_source.split('\n')[0].strip()
	if 'prompt' in first_line.lower() or 'prompt' in cell_source.lower():
	parsed_content["prompt"] = cell_source
	elif 'metadata' in cell_source.lower():
	parsed_content["metadata"] = cell_source
	elif 'response' in first_line.lower():
	parsed_content["response"] = cell_source
	elif 'thought' in cell_source.lower() or 'chain' in cell_source.lower():
	reasoning_chains_list.append(cell_source)

	if reasoning_chains_list:
	parsed_content["reasoning_chains"] = "\n\n---\n\n".join(reasoning_chains_list)
	if parsed_content["response"] != "Not found.":
	match = re.search(r'```cpp(.*?)```', parsed_content["response"], re.DOTALL)
	if match:
	parsed_content["cpp_code"] = match.group(1).strip()
	print("✅ Parsing complete.")
	return parsed_content

	def review_notebook_with_llm(content):
	"""Sends the parsed content to the OpenAI API for a detailed review."""
	if not openai_client:
	raise gr.Error("OpenAI client is not initialized. Check server logs.")

	print("\nSending content to AI for review...")
	prompt_for_llm = f"""
	You are an expert AI assistant that reviews programming assignments. Analyze the following content based on the strict criteria provided below.
	---
	### CONTENT TO REVIEW
	---
	1. Metadata :
	{content['metadata']}
	2. Prompt :
	{content['prompt']}
	3. Reasoning Chains:
	{content['reasoning_chains']}
	4. Final Response:
	{content['response']}
	5. Final C++ Code:
	```cpp
	{content['cpp_code']}
	```
	---
	### REVIEW CRITERIA
	---
	Please provide a detailed, point-by-point analysis covering the following three sections:
	1. Metadata Analysis:
	- LaTeX Formatting: Is all LaTeX enclosed only in `\$...\$`? No other formats like `$$...$$` or `\$...\$` or `$...$`are allowed.
	- Grammar: Is the response grammatically correct and well-written?
	- IT must have the
	Category: - Coding
	Topic: - Competitive Programming
	Subtopic: - [list of the topics that is related to the provided final c++ code]
	Difficulty: - Hard
	Languages: - C++
	Number of approaches: - (number, \$ .. Complexity1 \\to complexity2 .... \$) both must be present
	Number of Chain: total number of chains in the reasoing.
	2. Prompt Analysis:
	- LaTeX Formatting: Is all LaTeX enclosed only in `\$...\$`? No other formats like `$$...$$` or `\$...\$` or `$...$`are allowed.
	- Grammar: Is the response grammatically correct and well-written?
	- There must be atleast two explample of input output pair in the prompt
	3. Reasoning Chains Flow Analysis:
	- LaTeX Formatting: Is all LaTeX enclosed only in `\$...\$`? No other formats like `$$...$$` or `\$...\$` or `$...$`are allowed.
	- Relevance: Are the approaches relevant to the problem?
	- Progression: Does the reasoning develop from a simple/inefficient approach to the final, optimized one? Each subequent chain should be more optimized than previous
	- Critique of Prior Approaches: Does the author mention the limitations or disadvantages of the inefficient approaches?
	- Justification: Does the author justify why each new approach is better than the previous one?
	- Improvements: Does the final approach clearly state its improvements?
	- Predictive terms: It should not have any predictive commitment about efficiency or correctness of the chain before discussing the approach.
	4. Final Response Analysis:
	- LaTeX Formatting: Is all LaTeX enclosed only in `\$...\$`? No other formats like `$$...$$` or `\$...\$` or `$...$`are allowed.
	- Grammar: Is the response grammatically correct and well-written?
	- There must be correct problem understanding according to the prompt
	- There must be a approach explanation that should be correct for the prompt and should be exact same as proposed final efficient approach in the chains.
	- there must be correct code explanation that correctly matches with code.
	- there must be a complexity analysis that should be correct for the code and should match with the complexity of the proposed final efficient approach in the chains.
	- in the end must a conclusion part.
	5. C++ Coding Style Analysis:
	- Naming: Are variables and functions named explicitly and clearly?
	- Glodable variable : should have kCamelCase and function and other variable should be camelCase
	- Style: Is the style standard and natural, avoiding unnecessary inlines, typedefs, etc.?
	- Abbreviations: Are vague abbreviations avoided?
	- Const Usage: Is any unnatural `const` usage explained?
	- Comments: Are comments added where necessary and comment should be in De-Oxygen format only.
	- Please excute the C++ code in latest C++20 compiler and should not throw any errors.
	Provide a structured review with a clear verdict fail or pass with the reason for each section and each point.
	"""
	try:
	response = openai_client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system", "content": "You are an expert AI assistant that reviews programming assignments based on a strict set of guidelines. Provide a detailed, point-by-point analysis."},
	{"role": "user", "content": prompt_for_llm}
	]
	)
	print("✅ AI review received successfully.")
	return response.choices[0].message.content.strip()
	except openai.APIError as e:
	raise gr.Error(f"An OpenAI API error occurred: {e}")
	except Exception as e:
	raise gr.Error(f"An unexpected error occurred: {e}")

	def review_colab_notebook_ui(colab_link, progress=gr.Progress()):
	"""The main function that orchestrates the entire review process for the UI."""
	if not colab_link or not colab_link.strip():
	raise gr.Error("Please enter a Google Colab link.")

	progress(0, desc="Starting Review...")

	progress(0.1, desc="Fetching Notebook from Google Drive...")
	notebook_data = get_private_notebook_from_drive(colab_link)

	progress(0.3, desc="Parsing Notebook Content...")
	parsed_content = parse_colab_notebook(notebook_data)
	if not parsed_content:
	raise gr.Error("Failed to parse the notebook.")

	progress(0.5, desc="Sending Content to AI for Review (this may take a moment)...")
	ai_review = review_notebook_with_llm(parsed_content)

	progress(1, desc="Review Complete!")
	return ai_review

	# --- Step 3: Build and launch the Gradio Interface ---
	with gr.Blocks(theme=gr.themes.Soft()) as iface:
	gr.Markdown("# AI-Powered Colab Notebook Reviewer")
	gr.Markdown("Enter the Google Drive link to a Colab notebook to get a detailed review based on a predefined set of criteria.")

	with gr.Row():
	colab_link_input = gr.Textbox(
	label="Google Colab Link",
	placeholder="e.g., https://colab.research.google.com/drive/1JDtCbau26lE4mNvWzEDd23WfGLB6xtUq",
	scale=4
	)
	review_button = gr.Button("Review Notebook", variant="primary", scale=1)

	output_markdown = gr.Markdown("Your review will appear here...")

	review_button.click(
	fn=review_colab_notebook_ui,
	inputs=colab_link_input,
	outputs=output_markdown
	)

	gr.Examples(
	examples=[
	"https://colab.research.google.com/drive/1JDtCbau26lE4mNvWzEDd23WfGLB6xtUq",
	"https://colab.research.google.com/drive/1hmLd0pSG6t-mLIAv1cXNPXPbJzbcRjg7"
	],
	inputs=colab_link_input
	)

	# Launch the UI.
	iface.launch(share=True)