aarushi-211's picture
Added modules
92c4d54
from summarizers.file_summarizers import summarize_file_with_graph
from models.models import file_summarizer
def summarize_repo_with_graph(file_dict: dict, top_files=5, top_k_funcs=5):
"""
Summarize a repository:
- Summarize each file using summarize_file_with_graph
- Combine top-k summaries
- Feed to LED summarizer
"""
file_summaries = []
for file_path, code_text in list(file_dict.items())[:top_files]:
try:
summary = summarize_file_with_graph(code_text, top_k=top_k_funcs)
file_summaries.append(summary)
except Exception as e:
print(f"Skipped file {file_path} due to: {e}")
if not file_summaries:
return "No valid summaries found."
combined_input = "\n\n".join(file_summaries)
final_summary = file_summarizer(
combined_input,
max_length=256,
min_length=100,
no_repeat_ngram_size=3,
do_sample=False,
)[0]["summary_text"]
return final_summary
# from transformers import AutoTokenizer
# # reuse the same tokenizer instance you already loaded
# tokenizer = file_summarizer.tokenizer
# def summarize_repo_with_graph(file_dict: dict, top_files=5, top_k_funcs=5):
# file_summaries = []
# for file_path, code_text in list(file_dict.items())[:top_files]:
# try:
# summary = summarize_file_with_graph(code_text, top_k=top_k_funcs)
# file_summaries.append(summary)
# except Exception as e:
# print(f"Skipped file {file_path} due to: {e}")
# if not file_summaries:
# return "No valid summaries found."
# combined_input = "\n\n".join(file_summaries)
# # dynamic length cap
# tokens = tokenizer.encode(combined_input, truncation=False)
# suggested_max = min(256, max(100, len(tokens) // 2))
# suggested_min = min(100, suggested_max - 20)
# final_summary = file_summarizer(
# combined_input,
# max_length=suggested_max,
# min_length=suggested_min,
# no_repeat_ngram_size=3,
# do_sample=False,
# )[0]["summary_text"]
# return final_summary