# app.py – Final Updated Version with Unified Visualization (Model selection-safe + Visualization Fixes)
import gradio as gr
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import zipfile
import json
from datetime import datetime
from dotenv import load_dotenv
from response_generator import generate_all_responses_with_reasoning
from round_robin_evaluator import comprehensive_round_robin_evaluation
load_dotenv()
pio.kaleido.scope.default_format = "png"
metrics = ['helpfulness', 'correctness', 'coherence', 'tone_score',
'accuracy', 'relevance', 'completeness', 'clarity']
def extract_text_from_resume(file):
ext = os.path.splitext(file.name)[1].lower()
if ext == ".pdf":
import fitz
with fitz.open(file.name) as doc:
return "\n".join(page.get_text() for page in doc)
elif ext == ".docx":
import docx
doc = docx.Document(file.name)
return "\n".join(p.text for p in doc.paragraphs)
elif ext == ".txt":
return file.read().decode('utf-8')
return ""
def ats_score_advanced(response, resume, jd):
prompt = f"""
You are a professional ATS scoring engine. Compare the generated response to the candidate's resume and job description using:
1. Keyword Matching
2. Section Weighting
3. Semantic Similarity
4. Recency/Frequency
5. Penalty Detection
6. Aggregation
Resume:
{resume}
Job Description:
{jd}
Response:
{response}
Return JSON:
{{"ats_score": <0-100>, "strengths": ["..."], "gaps": ["..."], "suggestions": ["..."]}}
"""
from openai import OpenAI
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
try:
res = openai_client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
temperature=0
)
return json.loads(res.choices[0].message.content.strip())
except:
return {"ats_score": 50, "strengths": [], "gaps": [], "suggestions": ["Check formatting."]}
def create_visualizations(df, results_dir):
html_files = []
summary = df.groupby('target_model')[metrics].mean().reset_index()
font_style = dict(family="Arial, sans-serif", size=12, color="black")
# 1. Heatmap with professional styling
heatmap = px.imshow(
summary[metrics].values,
x=metrics,
y=summary['target_model'],
labels=dict(x="Metric", y="Model", color="Score"),
title="Heatmap: Metrics Across Models",
color_continuous_scale='Viridis'
)
heatmap.update_layout(
margin=dict(l=80, r=40, t=80, b=120),
xaxis_tickangle=-45,
title_font=dict(size=18, family="Arial, sans-serif"),
font=font_style
)
heatmap_path = os.path.join(results_dir, "heatmap.html")
heatmap.write_html(heatmap_path)
html_files.append(heatmap_path)
# 2. Radar Chart with professional styling
radar = go.Figure()
for _, row in summary.iterrows():
radar.add_trace(go.Scatterpolar(
r=list(row[metrics]),
theta=metrics,
fill='toself',
name=row['target_model']
))
radar.update_layout(
title="Radar Chart: Model Score Profiles",
polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
legend_title_text='Models',
title_font=dict(size=18, family="Arial, sans-serif"),
font=font_style,
margin=dict(l=60, r=60, t=80, b=80)
)
radar_path = os.path.join(results_dir, "radar.html")
radar.write_html(radar_path)
html_files.append(radar_path)
# 3. Bar Chart with professional styling
bar = px.bar(
summary.melt(id_vars='target_model'),
x='variable',
y='value',
color='target_model',
barmode='group',
title="Bar Chart: Metric Comparison",
labels={'variable': 'Metric', 'value': 'Score', 'target_model': 'Model'}
)
bar.update_layout(
margin=dict(l=60, r=20, t=80, b=120),
xaxis_tickangle=-45,
legend_title_text='Model',
title_font=dict(size=18, family="Arial, sans-serif"),
font=font_style
)
bar_path = os.path.join(results_dir, "barchart.html")
bar.write_html(bar_path)
html_files.append(bar_path)
return (heatmap, radar, bar), html_files
def format_ats_feedback(score, strengths, gaps, suggestions):
color = "🟢" if score >= 75 else "🟡" if score >= 50 else "🔴"
return f"""
### ATS Match Score: ~{score}% {color}
#### **Strengths / High Matches:**
{chr(10).join([f"* {s}" for s in strengths]) if strengths else "* None found."}
#### **Partial or Missing:**
{chr(10).join([f"* {g}" for g in gaps]) if gaps else "* None mentioned."}
#### **How to Improve ATS Score:**
{chr(10).join([f"1. {s}" for s in suggestions]) if suggestions else "1. Add missing skills."}
"""
def process_prompt(prompt, enable_realtime, enable_eval, enable_analysis, user_file, model_selection):
selected_models = [m for m, enabled in zip(["GPT-4", "Claude 3", "Gemini 1.5"], model_selection) if enabled]
resume_text = ""
job_description = prompt
batch_mode = user_file and hasattr(user_file, 'name') and user_file.name.endswith(".csv")
resume_mode = user_file and hasattr(user_file, 'name') and user_file.name.lower().endswith(('.pdf', '.docx', '.txt'))
prompts = [prompt]
ats_summary_texts = []
search_results = ""
if batch_mode:
df_batch = pd.read_csv(user_file.name)
prompts = df_batch['prompt'].dropna().tolist()
elif resume_mode:
resume_text = extract_text_from_resume(user_file)
all_rows, all_charts = [], []
zip_path, ats_table_markdown = None, ""
for prompt_text in prompts:
responses = generate_all_responses_with_reasoning(
prompt_text,
selected_models,
resume_text if resume_mode else None,
job_description if resume_mode else None
)
if responses:
first_response = list(responses.values())[0]
search_results = first_response.get('search_results', '')
is_ats = first_response.get('is_ats', False)
ats_rows = []
for model in responses:
model_resp = responses[model]['response']
model_reasoning = responses[model]['reasoning']
responses[model]['ats_embed'] = f"### Response\n\n{model_resp}\n\n---\n\n**Explainability:**\n{model_reasoning}"
if resume_mode and is_ats:
try:
ats_result = ats_score_advanced(model_resp, resume_text, prompt_text)
ats_rows.append(f"| {model} | {ats_result['ats_score']} | {', '.join(ats_result.get('strengths', []))} | {', '.join(ats_result.get('suggestions', []))} |")
except:
ats_rows.append(f"| {model} | N/A | N/A | N/A |")
if ats_rows:
ats_table_markdown = "| Model | Score | Strengths | Suggestions |\n|-------|-------|-----------|-------------|\n" + "\n".join(ats_rows)
# Always run evaluation to generate chart data
compact = {k: v['response'] for k, v in responses.items()}
eval_result = comprehensive_round_robin_evaluation(compact, prompt_text)
for model, data in eval_result.items():
for evaluator, scores in data['evaluations'].items():
row = {
'prompt': prompt_text,
'target_model': model,
'evaluator': evaluator,
'response': responses[model]['response'],
'explainability': responses[model]['reasoning']
}
row.update({k: scores.get(k, 0.5) for k in metrics})
row.update({f"avg_{k}": data['average_scores'].get(k, 0.5) for k in metrics})
all_rows.append(row)
df_all = pd.DataFrame(all_rows)
if not df_all.empty:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_dir = f"results/batch_{timestamp}"
os.makedirs(results_dir, exist_ok=True)
csv_path = os.path.join(results_dir, "evaluation.csv")
df_all.to_csv(csv_path, index=False)
(heatmap, radar, bar), chart_paths = create_visualizations(df_all, results_dir)
all_charts = [heatmap, radar, bar]
zip_path = os.path.join(results_dir, "bundle.zip")
with zipfile.ZipFile(zip_path, 'w') as zipf:
zipf.write(csv_path, arcname="evaluation.csv")
for chart in chart_paths:
zipf.write(chart, arcname=os.path.basename(chart))
if batch_mode:
df_batch['ATS Summary'] = ats_summary_texts
df_batch.to_csv(os.path.join(results_dir, "batch_prompts_output.csv"), index=False)
zipf.write(os.path.join(results_dir, "batch_prompts_output.csv"), arcname="batch_prompts_output.csv")
# Conditional UI updates
eval_table = df_all[['target_model', 'evaluator'] + metrics] if not df_all.empty and enable_eval else pd.DataFrame()
ats_md = ats_table_markdown if resume_mode else ""
return tuple(
responses[model].get('ats_embed', responses[model]['response']) for model in ["GPT-4", "Claude 3", "Gemini 1.5"]
) + (
search_results or "N/A",
*all_charts,
eval_table,
ats_md,
zip_path
)
def download_results(path):
return path if path and os.path.exists(path) else None
def create_interface():
with gr.Blocks(title="LLM Comparison Hub") as demo:
gr.Markdown("""
# LLM Comparison Hub
This app compares LLM responses using round-robin evaluations, with real-time query detection and comprehensive analysis.
**How to use:**
- Enter a prompt (JD or query)
- Upload a resume (PDF/DOCX/TXT) or a CSV with prompts
- Select models
- Click evaluate
**Features:**
- Real-time web search fallback
- Resume vs JD ATS scoring (optional)
- Batch CSV prompt evaluation
- Visualizations (Heatmap, Radar, Bar)
- ZIP export of all results
""")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Enter Prompt", lines=4)
user_file = gr.File(label="Upload Resume or CSV", file_types=[".pdf", ".docx", ".txt", ".csv"])
model_selector = gr.CheckboxGroup(label="Select Models", choices=["GPT-4", "Claude 3", "Gemini 1.5"], value=["GPT-4", "Claude 3", "Gemini 1.5"])
enable_realtime = gr.Checkbox(label="Enable real-time detection", value=True)
enable_eval = gr.Checkbox(label="Enable evaluation", value=True)
enable_analysis = gr.Checkbox(label="Enable analysis (currently not used)", value=True)
submit = gr.Button("Run Evaluation")
with gr.Column():
with gr.Tabs():
with gr.Tab("GPT-4"): gpt_out = gr.Markdown()
with gr.Tab("Claude 3"): claude_out = gr.Markdown()
with gr.Tab("Gemini 1.5"): gemini_out = gr.Markdown()
with gr.Tab("Evaluation Table"): df_out = gr.Dataframe()
with gr.Tab("ATS Evaluation"): ats_summary = gr.Markdown()
with gr.Tab("Search Results"): search_out = gr.Markdown()
with gr.Tab("Visualizations"):
heatmap_plot = gr.Plot()
radar_plot = gr.Plot()
bar_plot = gr.Plot()
export_btn = gr.Button("Download ZIP Bundle")
zip_output = gr.File(file_types=[".zip"], interactive=False, visible=True)
submit.click(
fn=process_prompt,
inputs=[prompt, enable_realtime, enable_eval, enable_analysis, user_file, model_selector],
outputs=[gpt_out, claude_out, gemini_out, search_out, heatmap_plot, radar_plot, bar_plot, df_out, ats_summary, zip_output]
)
export_btn.click(download_results, inputs=[zip_output], outputs=[zip_output])
return demo
if __name__ == "__main__":
app = create_interface()
app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)