Spaces:
Build error
Build error
Upload 8 Files
Browse files- .gitattributes +38 -35
- .gitignore +2 -0
- Dockerfile +6 -0
- README.md +12 -12
- Routes.py +113 -0
- app.py +20 -0
- requirements.txt +15 -0
- utils.py +30 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,38 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
Models/Roberta-base-5_3k-epoch2/*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
Models/Roberta-base-5_3k-epoch2/*.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
Models/Roberta-base-5_3k-epoch2/*.txt filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
__pycache__
|
Dockerfile
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
WORKDIR /app
|
| 3 |
+
COPY . .
|
| 4 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 5 |
+
EXPOSE 7860
|
| 6 |
+
CMD ["uvicorn","app:app","--port","7860"]
|
README.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: HallucinationDetectionServer
|
| 3 |
-
emoji: 🏃
|
| 4 |
-
colorFrom: yellow
|
| 5 |
-
colorTo: blue
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 5.45.0
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: HallucinationDetectionServer
|
| 3 |
+
emoji: 🏃
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.45.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
Routes.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi.routing import APIRouter
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from Pipeline.HallucinationPipeline import HallucinationPipeline
|
| 4 |
+
from Pipeline.CorrectionLLMs import DeepseekAPI
|
| 5 |
+
from nltk.tokenize import sent_tokenize
|
| 6 |
+
from utils import detectionProcess
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
deepseek_apikey="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"
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
router=APIRouter(prefix="/api")
|
| 14 |
+
pipeline=HallucinationPipeline("Razor2507/Roberta-Base-Finetuned","cpu")
|
| 15 |
+
deepseek=DeepseekAPI(api_key=deepseek_apikey)
|
| 16 |
+
|
| 17 |
+
# {'predictions': [0],
|
| 18 |
+
# 'corrected_summary': [],
|
| 19 |
+
# 'sent_predicted': [array([2, 0, 0, 0])],
|
| 20 |
+
# 'factual_score': [0.21776947937905788],
|
| 21 |
+
# 'contradiction_score': [0.7815729230642319]}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# Detection Endpoint
|
| 26 |
+
class DetectionRequest(BaseModel):
|
| 27 |
+
article:str
|
| 28 |
+
summary:str
|
| 29 |
+
arbiter:str
|
| 30 |
+
|
| 31 |
+
@router.post("/detect")
|
| 32 |
+
def detect(data:DetectionRequest):
|
| 33 |
+
try:
|
| 34 |
+
article=data.article.strip().replace("\n"," ").replace("\t"," ")
|
| 35 |
+
summary=data.summary.strip().replace("\n"," ").replace("\t"," ")
|
| 36 |
+
arbiter=True if data.arbiter=="on" else False
|
| 37 |
+
print("Arbiter : ",arbiter)
|
| 38 |
+
result=detectionProcess(article=article,summary=summary,pipeline=pipeline,arbiter=arbiter)
|
| 39 |
+
result["status"]=200
|
| 40 |
+
return result
|
| 41 |
+
except Exception as e:
|
| 42 |
+
print(e)
|
| 43 |
+
return {"status":404}
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# Correction Endpoint
|
| 48 |
+
class correctionRequest(BaseModel):
|
| 49 |
+
article:str
|
| 50 |
+
tag_summary:str
|
| 51 |
+
model:str
|
| 52 |
+
|
| 53 |
+
@router.post("/correct")
|
| 54 |
+
def correct(data:correctionRequest):
|
| 55 |
+
try:
|
| 56 |
+
|
| 57 |
+
if data.model=="mistral":
|
| 58 |
+
pass
|
| 59 |
+
elif data.model=="gemini":
|
| 60 |
+
pass
|
| 61 |
+
elif data.model=="deepseek":
|
| 62 |
+
correction=deepseek.correct(premise=data.article, summary=data.tag_summary)
|
| 63 |
+
|
| 64 |
+
print(correction)
|
| 65 |
+
|
| 66 |
+
result=detectionProcess(article=data.article,summary=correction,pipeline=pipeline)
|
| 67 |
+
result["corrected_summary"]=correction
|
| 68 |
+
result["status"]=200
|
| 69 |
+
return result
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(e)
|
| 72 |
+
return {"status":404}
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
@router.get("/test")
|
| 76 |
+
def keyTest():
|
| 77 |
+
print("Testing ",os.getenv("deepseek_apikey"))
|
| 78 |
+
return {"msg":"testing_works"}
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# @router.post("/detect")
|
| 87 |
+
# def detect(data:DetectionRequest):
|
| 88 |
+
# article=data.article.strip().replace("\n","").replace("\t"," ")
|
| 89 |
+
# summary=data.summary.strip().replace("\n","").replace("\t"," ")
|
| 90 |
+
|
| 91 |
+
# result=pipeline.process([[article,summary]],correct_the_summary=False)
|
| 92 |
+
# all_sentences=sent_tokenize(summary)
|
| 93 |
+
# print(result)
|
| 94 |
+
# summary=pipeline.addTags(all_sentences,result["sent_predicted"][0],len(all_sentences))
|
| 95 |
+
# score=str(result["factual_score"][0])
|
| 96 |
+
# sentenceLabels=list(result["sent_predicted"][0])
|
| 97 |
+
# labelCounts=[sentenceLabels.count(0),sentenceLabels.count(2)]
|
| 98 |
+
|
| 99 |
+
# prompt=f"""
|
| 100 |
+
# Here is a summary with hallucinated parts marked using <xx> tags.
|
| 101 |
+
|
| 102 |
+
# Please correct only the text inside the <xx> tags to make it factually accurate based on the original article. Leave the rest of the summary unchanged and remove the <xx> tags after correction.
|
| 103 |
+
|
| 104 |
+
# Return the summary with hallucinated parts fixed and you can remove those <xx></xx> tags. Don't remove that entire sentence.
|
| 105 |
+
|
| 106 |
+
# Original Article:
|
| 107 |
+
# {data.article}
|
| 108 |
+
|
| 109 |
+
# Summary:
|
| 110 |
+
# {summary}
|
| 111 |
+
|
| 112 |
+
# """
|
| 113 |
+
# return {"summary":summary,"score":score,"counts":labelCounts,"copy_prompt":prompt}
|
app.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from Routes import router
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
from fastapi.staticfiles import StaticFiles
|
| 5 |
+
from fastapi.responses import FileResponse
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
app=FastAPI()
|
| 13 |
+
app.include_router(router)
|
| 14 |
+
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_credentials=True)
|
| 15 |
+
app.mount("/assets",StaticFiles(directory="dist/assets"),name="assets")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@app.get("/{full_path:path}")
|
| 19 |
+
async def main():
|
| 20 |
+
return FileResponse("dist/index.html")
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.111.0
|
| 2 |
+
uvicorn==0.29.0
|
| 3 |
+
pydantic==2.1.1
|
| 4 |
+
python-dotenv==1.0.1
|
| 5 |
+
requests==2.32.3
|
| 6 |
+
nltk==3.9.1
|
| 7 |
+
transformers==4.53.2
|
| 8 |
+
torch==2.5.1+cu121
|
| 9 |
+
xgboost==3.0.0
|
| 10 |
+
scikit-learn==1.5.1
|
| 11 |
+
numpy==1.26.4
|
| 12 |
+
pandas==2.2.3
|
| 13 |
+
openai==1.3.7
|
| 14 |
+
google-generativeai==0.8.5
|
| 15 |
+
llama-cpp-python==0.3.12
|
utils.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from nltk.tokenize import sent_tokenize
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
# This method is created because it will be required for
|
| 5 |
+
# Detection as well as Correction (Like results after correction)
|
| 6 |
+
|
| 7 |
+
def detectionProcess(article,summary,pipeline,arbiter):
|
| 8 |
+
result=pipeline.process([[article,summary]],correct_the_summary=False,arbiter=arbiter)
|
| 9 |
+
all_sentences=sent_tokenize(summary)
|
| 10 |
+
print(result)
|
| 11 |
+
summary=pipeline.addTags(all_sentences,result["sent_predicted"][0],len(all_sentences))
|
| 12 |
+
score=str(result["factual_score"][0])
|
| 13 |
+
sentenceLabels=list(result["sent_predicted"][0])
|
| 14 |
+
labelCounts=[sentenceLabels.count(0),sentenceLabels.count(2)]
|
| 15 |
+
|
| 16 |
+
prompt=f"""
|
| 17 |
+
Here is a summary with hallucinated parts marked using <xx> tags.
|
| 18 |
+
|
| 19 |
+
Please correct only the text inside the <xx> tags to make it factually accurate based on the original article. Leave the rest of the summary unchanged and remove the <xx> tags after correction.
|
| 20 |
+
|
| 21 |
+
Return the summary with hallucinated parts fixed and you can remove those <xx></xx> tags. Don't remove that entire sentence.
|
| 22 |
+
|
| 23 |
+
Original Article:
|
| 24 |
+
{article}
|
| 25 |
+
|
| 26 |
+
Summary:
|
| 27 |
+
{summary}
|
| 28 |
+
|
| 29 |
+
"""
|
| 30 |
+
return {"summary":summary,"score":score,"counts":labelCounts,"copy_prompt":prompt}
|