Spaces:

holcombzv
/

fake_news_detection_api

Sleeping

holcombzv commited on Sep 4, 2025

Commit

4e20c0f

1 Parent(s): e5f1968

Class architecture introduced.

Files changed (3) hide show

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import pickle
 import logging
 from functions import *
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -38,16 +39,11 @@ class PredictionResponse(BaseModel):
     label: str
 @app.post("/predict", response_model=PredictionResponse)
-async def predict(request: TextRequest):
     try:
         html = request.text
-        text = get_article_text(html)
-        logger.info("Received text length:", len(text))
-        text_clean = clean_text(text)
-        logger.info("Cleaned text length:", len(text_clean))
-        score = evaluate_text(text_clean, model, tokenizer)
-        logger.info("Model score:", score)
         label = 'Real' if score > .5 else 'Fake'
         return {

 import logging
 from functions import *
+from classes import *
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
     label: str
 @app.post("/predict", response_model=PredictionResponse)
+async def evaluate_article(request: TextRequest):
     try:
         html = request.text
+        article = Article(1, html)
+        score = article.evaluate_article(model, tokenizer)
         label = 'Real' if score > .5 else 'Fake'
         return {

classes.py ADDED Viewed

+from functions import *
+class Paragraph:
+    def __init__(self, text: str, article_id: int, index: int):
+        self.text = text
+        self.article_id = article_id
+        self.index = index
+    def __str__(self):
+        return self.text
+    def evaluate_paragraph(self, model, tokenizer):
+        return evaluate_text(self.text, model, tokenizer)
+class Article:
+    def __init__(self, article_id: int, html: str):
+        self.article_id = article_id
+        self.html = html
+        self.text = get_article_text(self.html) or ''
+        self.text_length = len(self.text.split(' '))
+        self.paragraphs = split_paragraphs(self.text) or []
+        for i in range(1, len(self.paragraphs)):
+            paragraph = self.paragraphs[i]
+            paragraph = Paragraph(paragraph, self.article_id, i)
+            self.paragraphs[i] = paragraph
+    def __str__(self):
+        return str(self.article_id)
+    def evaluate_article(self, model, tokenizer):
+        scores = []
+        for paragraph in self.paragraphs:
+            paragraph_score = paragraph.evaluate_paragraph(model, tokenizer)
+            weighted_score = paragraph_score * len(paragraph.text.split(' '))
+            scores.append(weighted_score)
+        weighted_average = sum(scores) / self.text_length
+        return weighted_average

functions.py CHANGED Viewed

@@ -26,6 +26,12 @@ def get_article_text(html_text):
     except Exception as e:
         logger.exception(f'Error: Could not retrieve article text: {e}')
 def clean_text(text):
     try:
         text = text.lower()

     except Exception as e:
         logger.exception(f'Error: Could not retrieve article text: {e}')
+def split_paragraphs(text: str):
+    paragraphs = text.splitlines()
+    for paragraph in paragraphs:
+        paragraph = clean_text(paragraph)
+    return paragraphs
 def clean_text(text):
     try:
         text = text.lower()