Spaces:
Runtime error
Runtime error
| from flask import Flask, request | |
| from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from transformers import RobertaConfig | |
| from torch import cuda | |
| import torch | |
| import gradio as gr | |
| import os | |
| import re | |
| import pdfplumber | |
| app = Flask(__name__) | |
| ACCESS_TOKEN = os.environ["ACCESS_TOKEN"] | |
| # config = RobertaConfig.from_pretrained("PirateXX/ChatGPT-Text-Detector", use_auth_token= ACCESS_TOKEN) | |
| # model = RobertaForSequenceClassification.from_pretrained("PirateXX/ChatGPT-Text-Detector", use_auth_token= ACCESS_TOKEN, config = config) | |
| # model_name = "roberta-base" | |
| # tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu')) | |
| tokenizer = AutoTokenizer.from_pretrained("PirateXX/AI-Content-Detector", use_auth_token= ACCESS_TOKEN) | |
| model = AutoModelForSequenceClassification.from_pretrained("PirateXX/AI-Content-Detector", use_auth_token= ACCESS_TOKEN) | |
| # function to break text into an array of sentences | |
| def text_to_sentences(text): | |
| return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', text) | |
| # function to concatenate sentences into chunks of size 900 or less | |
| def chunks_of_900(text, chunk_size=900): | |
| sentences = text_to_sentences(text) | |
| chunks = [] | |
| current_chunk = "" | |
| for sentence in sentences: | |
| if len(current_chunk + sentence) <= chunk_size: | |
| if len(current_chunk)!=0: | |
| current_chunk += " "+sentence | |
| else: | |
| current_chunk += sentence | |
| else: | |
| chunks.append(current_chunk) | |
| current_chunk = sentence | |
| chunks.append(current_chunk) | |
| return chunks | |
| def predict(query, device="cpu"): | |
| tokens = tokenizer.encode(query) | |
| all_tokens = len(tokens) | |
| tokens = tokens[:tokenizer.model_max_length - 2] | |
| used_tokens = len(tokens) | |
| tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0) | |
| mask = torch.ones_like(tokens) | |
| with torch.no_grad(): | |
| logits = model(tokens.to(device), attention_mask=mask.to(device))[0] | |
| probs = logits.softmax(dim=-1) | |
| fake, real = probs.detach().cpu().flatten().numpy().tolist() | |
| return real | |
| def findRealProb(text): | |
| chunksOfText = (chunks_of_900(text)) | |
| results = [] | |
| for chunk in chunksOfText: | |
| output = predict(chunk) | |
| results.append([output, len(chunk)]) | |
| ans = 0 | |
| cnt=0 | |
| for prob, length in results: | |
| ans = ans + prob*length | |
| cnt+=length | |
| realProb = ans/cnt | |
| return {"Real": realProb, "Fake": 1-realProb, "results": results, "text": text} | |
| def upload_file(file): | |
| if file: | |
| pdf_file = file.name | |
| text = "" | |
| with pdfplumber.open(pdf_file) as pdf: | |
| cnt = 0 | |
| for page in pdf.pages: | |
| cnt+=1 | |
| text+=(page.extract_text(x_tolerance = 1)) | |
| if cnt>5: | |
| break | |
| text = text.replace('\n', ' ') | |
| return findRealProb(text) | |
| else: | |
| return {"error":'No PDF file found in request'} | |
| demo = gr.Interface( | |
| fn=upload_file, | |
| inputs=gr.File(), | |
| article = "Visit <a href = \"https://ai-content-detector.online/\">AI Content Detector</a> for better user experience!", | |
| outputs=gr.outputs.JSON(), | |
| interpretation="default",) | |
| demo.launch(show_api=False) |