NiviruIns commited on
Commit
cbccf44
·
verified ·
1 Parent(s): 6003ad9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -1,23 +1,22 @@
1
  import os
2
  from flask import Flask, request, jsonify
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  import torch
5
 
6
  app = Flask(__name__)
7
 
8
- # --- UPDATED MODEL ---
9
- # This model is specifically trained for git commit generation and is active.
10
- MODEL_NAME = "SEBIS/code_trans_t5_base_commit_generation"
11
 
12
  print(f"--- AI Commit Generator Server ---")
13
  print(f"Downloading/Loading Model: {MODEL_NAME}")
14
 
15
- device = "cpu"
16
 
17
  try:
18
- # Use AutoTokenizer and AutoModelForSeq2SeqLM for better compatibility
19
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, skip_special_tokens=True)
20
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device)
21
  print("✅ Model loaded successfully!")
22
  except Exception as e:
23
  print(f"❌ Error loading model: {e}")
@@ -27,12 +26,14 @@ def generate_summary(diff_text):
27
  if not diff_text or len(diff_text.strip()) < 5:
28
  return "Update file"
29
 
30
- # This model works best with raw code, but we tokenize it first
31
- input_ids = tokenizer.encode(diff_text, return_tensors="pt", max_length=512, truncation=True).to(device)
 
 
32
 
33
  outputs = model.generate(
34
  input_ids,
35
- max_length=80,
36
  num_beams=5,
37
  early_stopping=True
38
  )
@@ -52,7 +53,10 @@ def generate_commit():
52
  name = file_obj.get('name', 'Unknown File')
53
  diff = file_obj.get('diff', '')
54
 
55
- # Skip huge files to prevent crashing CPU
 
 
 
56
  if len(diff) > 6000:
57
  final_message_parts.append(f"{name}\nLarge changes detected")
58
  continue
 
1
  import os
2
  from flask import Flask, request, jsonify
3
+ from transformers import RobertaTokenizer, T5ForConditionalGeneration
4
  import torch
5
 
6
  app = Flask(__name__)
7
 
8
+ # --- SWITCH TO THE OFFICIAL SALESFORCE MODEL ---
9
+ # This is the industry standard for code summarization.
10
+ MODEL_NAME = "Salesforce/codet5-base"
11
 
12
  print(f"--- AI Commit Generator Server ---")
13
  print(f"Downloading/Loading Model: {MODEL_NAME}")
14
 
15
+ device = "cpu"
16
 
17
  try:
18
+ tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)
19
+ model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
 
20
  print("✅ Model loaded successfully!")
21
  except Exception as e:
22
  print(f"❌ Error loading model: {e}")
 
26
  if not diff_text or len(diff_text.strip()) < 5:
27
  return "Update file"
28
 
29
+ # CodeT5 works best when we tell it what to do
30
+ input_text = "Summarize: " + diff_text
31
+
32
+ input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
33
 
34
  outputs = model.generate(
35
  input_ids,
36
+ max_length=50, # Keep it short
37
  num_beams=5,
38
  early_stopping=True
39
  )
 
53
  name = file_obj.get('name', 'Unknown File')
54
  diff = file_obj.get('diff', '')
55
 
56
+ # --- DEBUG LOGGING ---
57
+ # This will show up in the Hugging Face Logs so we know what the AI saw
58
+ print(f"[{name}] Analyzing diff length: {len(diff)}")
59
+
60
  if len(diff) > 6000:
61
  final_message_parts.append(f"{name}\nLarge changes detected")
62
  continue