Linseypass commited on
Commit
e4349f5
·
1 Parent(s): 3645d8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -35
app.py CHANGED
@@ -56,17 +56,16 @@ def generate(title, abstract):
56
  This is for summarization
57
  '''
58
  tooShortForKeyword = False
59
- with open("data/sample-data.jsonl", "r") as f:
60
- obj = [json.loads(l) for l in f]
61
- doc = ""
62
- if len(obj[0]["target"]) > 1:
63
- doc += obj[0]["title"] + ". " + obj[0]["target"][0] + " " + obj[0]["target"][1]
64
- elif len(obj[0]["target"]) == 1:
65
- tooShortForKeyword = True
66
- doc += obj[0]["title"] + ". " + obj[0]["target"][0]
67
- else:
68
- tooShortForKeyword = True
69
- doc += obj[0]["title"]
70
  text = doc
71
  prompt = """
72
  Can you explain the main idea of what is being studied in the following paragraph for someone who is not familiar with the topic. Comment on areas of application.:
@@ -85,8 +84,6 @@ def generate(title, abstract):
85
  index_response += 10
86
  end_response = output.rfind('.') + 1
87
  response = output[index_response:end_response]
88
- with open("data/guanacoSummaryOutput.txt", "w") as f2:
89
- f2.write(response)
90
  print('Plain Language Summary Created.')
91
 
92
  '''
@@ -94,26 +91,25 @@ def generate(title, abstract):
94
  '''
95
  # the document is the title and first two sentences of the abstract.
96
 
97
- with open("data/sample-data.jsonl", "r") as f:
98
- obj = [json.loads(l) for l in f]
99
- doc = ""
100
- if len(obj[0]["target"]) > 1:
101
- doc += obj[0]["title"] + ". " + obj[0]["target"][0] + " " + obj[0]["target"][1]
102
- kw_model = KeyBERT(model="all-MiniLM-L6-v2")
103
- vectorizer = KeyphraseCountVectorizer()
104
- top_n = 2
105
- keywords = kw_model.extract_keywords(doc, stop_words="english", top_n = top_n, vectorizer=vectorizer, use_mmr=True)
106
- my_keywords = []
107
- for i in range(top_n):
108
- add = True
109
- for j in range(top_n):
110
- if i != j:
111
- if keywords[i][0] in keywords[j][0]:
112
- add = False
113
- if add:
114
- my_keywords.append(keywords[i][0])
115
- for entry in my_keywords:
116
- print(entry)
117
  '''
118
  This is for feeding the keyphrases into Guanaco.
119
  '''
@@ -136,8 +132,6 @@ def generate(title, abstract):
136
  index_response = output.find("### Assistant: ") + 15
137
  end_response = output.rfind('.') + 1
138
  responseTwo = output[index_response:end_response]
139
- with open("data/guanacoElaborationOutput.txt", "w") as f2:
140
- f2.write(responseTwo)
141
  print('Keyphrase elaboration ran.')
142
  return keyword_string, responseTwo, response
143
 
 
56
  This is for summarization
57
  '''
58
  tooShortForKeyword = False
59
+ obj = newline
60
+ doc = ""
61
+ if len(obj["target"]) > 1:
62
+ doc += obj["title"] + ". " + obj["target"][0] + " " + obj["target"][1]
63
+ elif len(obj["target"]) == 1:
64
+ tooShortForKeyword = True
65
+ doc += obj["title"] + ". " + obj["target"][0]
66
+ else:
67
+ tooShortForKeyword = True
68
+ doc += obj["title"]
 
69
  text = doc
70
  prompt = """
71
  Can you explain the main idea of what is being studied in the following paragraph for someone who is not familiar with the topic. Comment on areas of application.:
 
84
  index_response += 10
85
  end_response = output.rfind('.') + 1
86
  response = output[index_response:end_response]
 
 
87
  print('Plain Language Summary Created.')
88
 
89
  '''
 
91
  '''
92
  # the document is the title and first two sentences of the abstract.
93
 
94
+ obj = newline
95
+ doc = ""
96
+ if len(obj["target"]) > 1:
97
+ doc += obj["title"] + ". " + obj["target"][0] + " " + obj["target"][1]
98
+ kw_model = KeyBERT(model="all-MiniLM-L6-v2")
99
+ vectorizer = KeyphraseCountVectorizer()
100
+ top_n = 2
101
+ keywords = kw_model.extract_keywords(doc, stop_words="english", top_n = top_n, vectorizer=vectorizer, use_mmr=True)
102
+ my_keywords = []
103
+ for i in range(top_n):
104
+ add = True
105
+ for j in range(top_n):
106
+ if i != j:
107
+ if keywords[i][0] in keywords[j][0]:
108
+ add = False
109
+ if add:
110
+ my_keywords.append(keywords[i][0])
111
+ for entry in my_keywords:
112
+ print(entry)
 
113
  '''
114
  This is for feeding the keyphrases into Guanaco.
115
  '''
 
132
  index_response = output.find("### Assistant: ") + 15
133
  end_response = output.rfind('.') + 1
134
  responseTwo = output[index_response:end_response]
 
 
135
  print('Keyphrase elaboration ran.')
136
  return keyword_string, responseTwo, response
137