Spaces:

NiviruIns
/

ai-commit-server

Sleeping

App Files Files Community

NiviruIns commited on Feb 4

Commit

eb3184c

verified ·

1 Parent(s): cbccf44

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -16

app.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import os
 from flask import Flask, request, jsonify
-from transformers import RobertaTokenizer, T5ForConditionalGeneration
 import torch
 app = Flask(__name__)
-# --- SWITCH TO THE OFFICIAL SALESFORCE MODEL ---
-# This is the industry standard for code summarization.
-MODEL_NAME = "Salesforce/codet5-base"
 print(f"--- AI Commit Generator Server ---")
 print(f"Downloading/Loading Model: {MODEL_NAME}")
@@ -15,8 +14,9 @@ print(f"Downloading/Loading Model: {MODEL_NAME}")
 device = "cpu"
 try:
-    tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)
-    model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
     print("✅ Model loaded successfully!")
 except Exception as e:
     print(f"❌ Error loading model: {e}")
@@ -26,14 +26,12 @@ def generate_summary(diff_text):
     if not diff_text or len(diff_text.strip()) < 5:
         return "Update file"
-    # CodeT5 works best when we tell it what to do
-    input_text = "Summarize: " + diff_text
-    input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
     outputs = model.generate(
         input_ids,
-        max_length=50,       # Keep it short
         num_beams=5,
         early_stopping=True
     )
@@ -53,12 +51,11 @@ def generate_commit():
         name = file_obj.get('name', 'Unknown File')
         diff = file_obj.get('diff', '')
-        # --- DEBUG LOGGING ---
-        # This will show up in the Hugging Face Logs so we know what the AI saw
-        print(f"[{name}] Analyzing diff length: {len(diff)}")
-        if len(diff) > 6000:
-            final_message_parts.append(f"{name}\nLarge changes detected")
             continue
         try:

 import os
 from flask import Flask, request, jsonify
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import torch
 app = Flask(__name__)
+# --- SWITCH TO THE EXPERT MODEL ---
+MODEL_NAME = "SEBIS/code_trans_t5_base_commit_generation"
 print(f"--- AI Commit Generator Server ---")
 print(f"Downloading/Loading Model: {MODEL_NAME}")
 device = "cpu"
 try:
+    # AutoTokenizer handles the specific needs of this model automatically
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, skip_special_tokens=True)
+    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device)
     print("✅ Model loaded successfully!")
 except Exception as e:
     print(f"❌ Error loading model: {e}")
     if not diff_text or len(diff_text.strip()) < 5:
         return "Update file"
+    # The Expert model just needs the raw diff. No "Summarize:" prefix needed.
+    input_ids = tokenizer.encode(diff_text, return_tensors="pt", max_length=512, truncation=True).to(device)
     outputs = model.generate(
         input_ids,
+        max_length=80,
         num_beams=5,
         early_stopping=True
     )
         name = file_obj.get('name', 'Unknown File')
         diff = file_obj.get('diff', '')
+        print(f"[{name}] Length: {len(diff)}")
+        # Increased limit to 12,000 characters to handle larger updates
+        if len(diff) > 12000:
+            final_message_parts.append(f"{name}\nLarge changes detected (please commit in smaller chunks)")
             continue
         try: