Spaces:

NiviruIns
/

ai-commit-server

Sleeping

App Files Files Community

NiviruIns commited on Feb 4

Commit

cbccf44

verified ·

1 Parent(s): 6003ad9

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -12

app.py CHANGED Viewed

@@ -1,23 +1,22 @@
 import os
 from flask import Flask, request, jsonify
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import torch
 app = Flask(__name__)
-# --- UPDATED MODEL ---
-# This model is specifically trained for git commit generation and is active.
-MODEL_NAME = "SEBIS/code_trans_t5_base_commit_generation"
 print(f"--- AI Commit Generator Server ---")
 print(f"Downloading/Loading Model: {MODEL_NAME}")
-device = "cpu"
 try:
-    # Use AutoTokenizer and AutoModelForSeq2SeqLM for better compatibility
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, skip_special_tokens=True)
-    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device)
     print("✅ Model loaded successfully!")
 except Exception as e:
     print(f"❌ Error loading model: {e}")
@@ -27,12 +26,14 @@ def generate_summary(diff_text):
     if not diff_text or len(diff_text.strip()) < 5:
         return "Update file"
-    # This model works best with raw code, but we tokenize it first
-    input_ids = tokenizer.encode(diff_text, return_tensors="pt", max_length=512, truncation=True).to(device)
     outputs = model.generate(
         input_ids,
-        max_length=80,
         num_beams=5,
         early_stopping=True
     )
@@ -52,7 +53,10 @@ def generate_commit():
         name = file_obj.get('name', 'Unknown File')
         diff = file_obj.get('diff', '')
-        # Skip huge files to prevent crashing CPU
         if len(diff) > 6000:
             final_message_parts.append(f"{name}\nLarge changes detected")
             continue

 import os
 from flask import Flask, request, jsonify
+from transformers import RobertaTokenizer, T5ForConditionalGeneration
 import torch
 app = Flask(__name__)
+# --- SWITCH TO THE OFFICIAL SALESFORCE MODEL ---
+# This is the industry standard for code summarization.
+MODEL_NAME = "Salesforce/codet5-base"
 print(f"--- AI Commit Generator Server ---")
 print(f"Downloading/Loading Model: {MODEL_NAME}")
+device = "cpu"
 try:
+    tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)
+    model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
     print("✅ Model loaded successfully!")
 except Exception as e:
     print(f"❌ Error loading model: {e}")
     if not diff_text or len(diff_text.strip()) < 5:
         return "Update file"
+    # CodeT5 works best when we tell it what to do
+    input_text = "Summarize: " + diff_text
+    input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
     outputs = model.generate(
         input_ids,
+        max_length=50,       # Keep it short
         num_beams=5,
         early_stopping=True
     )
         name = file_obj.get('name', 'Unknown File')
         diff = file_obj.get('diff', '')
+        # --- DEBUG LOGGING ---
+        # This will show up in the Hugging Face Logs so we know what the AI saw
+        print(f"[{name}] Analyzing diff length: {len(diff)}")
         if len(diff) > 6000:
             final_message_parts.append(f"{name}\nLarge changes detected")
             continue