from summarizers.file_summarizers import summarize_file_with_graph from models.models import file_summarizer def summarize_repo_with_graph(file_dict: dict, top_files=5, top_k_funcs=5): """ Summarize a repository: - Summarize each file using summarize_file_with_graph - Combine top-k summaries - Feed to LED summarizer """ file_summaries = [] for file_path, code_text in list(file_dict.items())[:top_files]: try: summary = summarize_file_with_graph(code_text, top_k=top_k_funcs) file_summaries.append(summary) except Exception as e: print(f"Skipped file {file_path} due to: {e}") if not file_summaries: return "No valid summaries found." combined_input = "\n\n".join(file_summaries) final_summary = file_summarizer( combined_input, max_length=256, min_length=100, no_repeat_ngram_size=3, do_sample=False, )[0]["summary_text"] return final_summary # from transformers import AutoTokenizer # # reuse the same tokenizer instance you already loaded # tokenizer = file_summarizer.tokenizer # def summarize_repo_with_graph(file_dict: dict, top_files=5, top_k_funcs=5): # file_summaries = [] # for file_path, code_text in list(file_dict.items())[:top_files]: # try: # summary = summarize_file_with_graph(code_text, top_k=top_k_funcs) # file_summaries.append(summary) # except Exception as e: # print(f"Skipped file {file_path} due to: {e}") # if not file_summaries: # return "No valid summaries found." # combined_input = "\n\n".join(file_summaries) # # dynamic length cap # tokens = tokenizer.encode(combined_input, truncation=False) # suggested_max = min(256, max(100, len(tokens) // 2)) # suggested_min = min(100, suggested_max - 20) # final_summary = file_summarizer( # combined_input, # max_length=suggested_max, # min_length=suggested_min, # no_repeat_ngram_size=3, # do_sample=False, # )[0]["summary_text"] # return final_summary