holcombzv commited on
Commit
4e20c0f
·
1 Parent(s): e5f1968

Class architecture introduced.

Browse files
Files changed (3) hide show
  1. app.py +4 -8
  2. classes.py +39 -0
  3. functions.py +6 -0
app.py CHANGED
@@ -7,6 +7,7 @@ import pickle
7
  import logging
8
 
9
  from functions import *
 
10
 
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
@@ -38,16 +39,11 @@ class PredictionResponse(BaseModel):
38
  label: str
39
 
40
  @app.post("/predict", response_model=PredictionResponse)
41
- async def predict(request: TextRequest):
42
  try:
43
  html = request.text
44
- text = get_article_text(html)
45
- logger.info("Received text length:", len(text))
46
- text_clean = clean_text(text)
47
- logger.info("Cleaned text length:", len(text_clean))
48
- score = evaluate_text(text_clean, model, tokenizer)
49
- logger.info("Model score:", score)
50
-
51
  label = 'Real' if score > .5 else 'Fake'
52
 
53
  return {
 
7
  import logging
8
 
9
  from functions import *
10
+ from classes import *
11
 
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
 
39
  label: str
40
 
41
  @app.post("/predict", response_model=PredictionResponse)
42
+ async def evaluate_article(request: TextRequest):
43
  try:
44
  html = request.text
45
+ article = Article(1, html)
46
+ score = article.evaluate_article(model, tokenizer)
 
 
 
 
 
47
  label = 'Real' if score > .5 else 'Fake'
48
 
49
  return {
classes.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functions import *
2
+
3
+ class Paragraph:
4
+ def __init__(self, text: str, article_id: int, index: int):
5
+ self.text = text
6
+ self.article_id = article_id
7
+ self.index = index
8
+
9
+ def __str__(self):
10
+ return self.text
11
+
12
+ def evaluate_paragraph(self, model, tokenizer):
13
+ return evaluate_text(self.text, model, tokenizer)
14
+
15
+ class Article:
16
+ def __init__(self, article_id: int, html: str):
17
+ self.article_id = article_id
18
+ self.html = html
19
+ self.text = get_article_text(self.html) or ''
20
+ self.text_length = len(self.text.split(' '))
21
+ self.paragraphs = split_paragraphs(self.text) or []
22
+
23
+ for i in range(1, len(self.paragraphs)):
24
+ paragraph = self.paragraphs[i]
25
+ paragraph = Paragraph(paragraph, self.article_id, i)
26
+ self.paragraphs[i] = paragraph
27
+
28
+ def __str__(self):
29
+ return str(self.article_id)
30
+
31
+ def evaluate_article(self, model, tokenizer):
32
+ scores = []
33
+ for paragraph in self.paragraphs:
34
+ paragraph_score = paragraph.evaluate_paragraph(model, tokenizer)
35
+ weighted_score = paragraph_score * len(paragraph.text.split(' '))
36
+ scores.append(weighted_score)
37
+
38
+ weighted_average = sum(scores) / self.text_length
39
+ return weighted_average
functions.py CHANGED
@@ -26,6 +26,12 @@ def get_article_text(html_text):
26
  except Exception as e:
27
  logger.exception(f'Error: Could not retrieve article text: {e}')
28
 
 
 
 
 
 
 
29
  def clean_text(text):
30
  try:
31
  text = text.lower()
 
26
  except Exception as e:
27
  logger.exception(f'Error: Could not retrieve article text: {e}')
28
 
29
+ def split_paragraphs(text: str):
30
+ paragraphs = text.splitlines()
31
+ for paragraph in paragraphs:
32
+ paragraph = clean_text(paragraph)
33
+ return paragraphs
34
+
35
  def clean_text(text):
36
  try:
37
  text = text.lower()