NiviruIns commited on
Commit
eb3184c
·
verified ·
1 Parent(s): cbccf44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -16
app.py CHANGED
@@ -1,13 +1,12 @@
1
  import os
2
  from flask import Flask, request, jsonify
3
- from transformers import RobertaTokenizer, T5ForConditionalGeneration
4
  import torch
5
 
6
  app = Flask(__name__)
7
 
8
- # --- SWITCH TO THE OFFICIAL SALESFORCE MODEL ---
9
- # This is the industry standard for code summarization.
10
- MODEL_NAME = "Salesforce/codet5-base"
11
 
12
  print(f"--- AI Commit Generator Server ---")
13
  print(f"Downloading/Loading Model: {MODEL_NAME}")
@@ -15,8 +14,9 @@ print(f"Downloading/Loading Model: {MODEL_NAME}")
15
  device = "cpu"
16
 
17
  try:
18
- tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)
19
- model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
 
20
  print("✅ Model loaded successfully!")
21
  except Exception as e:
22
  print(f"❌ Error loading model: {e}")
@@ -26,14 +26,12 @@ def generate_summary(diff_text):
26
  if not diff_text or len(diff_text.strip()) < 5:
27
  return "Update file"
28
 
29
- # CodeT5 works best when we tell it what to do
30
- input_text = "Summarize: " + diff_text
31
-
32
- input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
33
 
34
  outputs = model.generate(
35
  input_ids,
36
- max_length=50, # Keep it short
37
  num_beams=5,
38
  early_stopping=True
39
  )
@@ -53,12 +51,11 @@ def generate_commit():
53
  name = file_obj.get('name', 'Unknown File')
54
  diff = file_obj.get('diff', '')
55
 
56
- # --- DEBUG LOGGING ---
57
- # This will show up in the Hugging Face Logs so we know what the AI saw
58
- print(f"[{name}] Analyzing diff length: {len(diff)}")
59
 
60
- if len(diff) > 6000:
61
- final_message_parts.append(f"{name}\nLarge changes detected")
 
62
  continue
63
 
64
  try:
 
1
  import os
2
  from flask import Flask, request, jsonify
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  import torch
5
 
6
  app = Flask(__name__)
7
 
8
+ # --- SWITCH TO THE EXPERT MODEL ---
9
+ MODEL_NAME = "SEBIS/code_trans_t5_base_commit_generation"
 
10
 
11
  print(f"--- AI Commit Generator Server ---")
12
  print(f"Downloading/Loading Model: {MODEL_NAME}")
 
14
  device = "cpu"
15
 
16
  try:
17
+ # AutoTokenizer handles the specific needs of this model automatically
18
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, skip_special_tokens=True)
19
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device)
20
  print("✅ Model loaded successfully!")
21
  except Exception as e:
22
  print(f"❌ Error loading model: {e}")
 
26
  if not diff_text or len(diff_text.strip()) < 5:
27
  return "Update file"
28
 
29
+ # The Expert model just needs the raw diff. No "Summarize:" prefix needed.
30
+ input_ids = tokenizer.encode(diff_text, return_tensors="pt", max_length=512, truncation=True).to(device)
 
 
31
 
32
  outputs = model.generate(
33
  input_ids,
34
+ max_length=80,
35
  num_beams=5,
36
  early_stopping=True
37
  )
 
51
  name = file_obj.get('name', 'Unknown File')
52
  diff = file_obj.get('diff', '')
53
 
54
+ print(f"[{name}] Length: {len(diff)}")
 
 
55
 
56
+ # Increased limit to 12,000 characters to handle larger updates
57
+ if len(diff) > 12000:
58
+ final_message_parts.append(f"{name}\nLarge changes detected (please commit in smaller chunks)")
59
  continue
60
 
61
  try: