Seth0330 commited on
Commit
feb34d6
·
verified ·
1 Parent(s): 2fdf097

Delete main.py

Browse files
Files changed (1) hide show
  1. main.py +0 -39
main.py DELETED
@@ -1,39 +0,0 @@
1
- import PyPDF2
2
- import spacy
3
- from collections import Counter
4
- import heapq
5
- import io
6
-
7
- # Load spaCy model
8
- nlp = spacy.load("./en_core_web_sm-3.7.1")
9
-
10
- def read_pdf(file_stream):
11
- text = ''
12
- reader = PyPDF2.PdfReader(file_stream)
13
- for page in reader.pages:
14
- text += page.extract_text() + ' '
15
- return text.strip()
16
-
17
- def extract_key_phrases(text):
18
- doc = nlp(text)
19
- # Combine noun chunks and named entities as candidates for key phrases
20
- key_phrases = [chunk.text for chunk in doc.noun_chunks] + [ent.text for ent in doc.ents]
21
- return key_phrases
22
-
23
- def score_sentences(text, key_phrases):
24
- sentence_scores = {}
25
- doc = nlp(text)
26
- for sent in doc.sents:
27
- for phrase in key_phrases:
28
- if phrase in sent.text:
29
- if sent in sentence_scores:
30
- sentence_scores[sent] += 1
31
- else:
32
- sentence_scores[sent] = 1
33
- return sentence_scores
34
-
35
- def summarize_text(sentence_scores, num_points=5):
36
- summary_sentences = heapq.nlargest(num_points, sentence_scores, key=sentence_scores.get)
37
- # Format summary as bullet points
38
- summary = '\n'.join([f"- {sent.text}" for sent in summary_sentences])
39
- return summary