Razor2507 commited on
Commit
915cbc7
·
verified ·
1 Parent(s): eac491b

Upload 8 Files

Browse files
Files changed (8) hide show
  1. .gitattributes +38 -35
  2. .gitignore +2 -0
  3. Dockerfile +6 -0
  4. README.md +12 -12
  5. Routes.py +113 -0
  6. app.py +20 -0
  7. requirements.txt +15 -0
  8. utils.py +30 -0
.gitattributes CHANGED
@@ -1,35 +1,38 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Models/Roberta-base-5_3k-epoch2/*.safetensors filter=lfs diff=lfs merge=lfs -text
37
+ Models/Roberta-base-5_3k-epoch2/*.json filter=lfs diff=lfs merge=lfs -text
38
+ Models/Roberta-base-5_3k-epoch2/*.txt filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+ WORKDIR /app
3
+ COPY . .
4
+ RUN pip install --no-cache-dir -r requirements.txt
5
+ EXPOSE 7860
6
+ CMD ["uvicorn","app:app","--port","7860"]
README.md CHANGED
@@ -1,12 +1,12 @@
1
- ---
2
- title: HallucinationDetectionServer
3
- emoji: 🏃
4
- colorFrom: yellow
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 5.45.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: HallucinationDetectionServer
3
+ emoji: 🏃
4
+ colorFrom: yellow
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 5.45.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
Routes.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.routing import APIRouter
2
+ from pydantic import BaseModel
3
+ from Pipeline.HallucinationPipeline import HallucinationPipeline
4
+ from Pipeline.CorrectionLLMs import DeepseekAPI
5
+ from nltk.tokenize import sent_tokenize
6
+ from utils import detectionProcess
7
+ import os
8
+
9
+
10
+ deepseek_apikey="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"
11
+
12
+
13
+ router=APIRouter(prefix="/api")
14
+ pipeline=HallucinationPipeline("Razor2507/Roberta-Base-Finetuned","cpu")
15
+ deepseek=DeepseekAPI(api_key=deepseek_apikey)
16
+
17
+ # {'predictions': [0],
18
+ # 'corrected_summary': [],
19
+ # 'sent_predicted': [array([2, 0, 0, 0])],
20
+ # 'factual_score': [0.21776947937905788],
21
+ # 'contradiction_score': [0.7815729230642319]}
22
+
23
+
24
+
25
+ # Detection Endpoint
26
+ class DetectionRequest(BaseModel):
27
+ article:str
28
+ summary:str
29
+ arbiter:str
30
+
31
+ @router.post("/detect")
32
+ def detect(data:DetectionRequest):
33
+ try:
34
+ article=data.article.strip().replace("\n"," ").replace("\t"," ")
35
+ summary=data.summary.strip().replace("\n"," ").replace("\t"," ")
36
+ arbiter=True if data.arbiter=="on" else False
37
+ print("Arbiter : ",arbiter)
38
+ result=detectionProcess(article=article,summary=summary,pipeline=pipeline,arbiter=arbiter)
39
+ result["status"]=200
40
+ return result
41
+ except Exception as e:
42
+ print(e)
43
+ return {"status":404}
44
+
45
+
46
+
47
+ # Correction Endpoint
48
+ class correctionRequest(BaseModel):
49
+ article:str
50
+ tag_summary:str
51
+ model:str
52
+
53
+ @router.post("/correct")
54
+ def correct(data:correctionRequest):
55
+ try:
56
+
57
+ if data.model=="mistral":
58
+ pass
59
+ elif data.model=="gemini":
60
+ pass
61
+ elif data.model=="deepseek":
62
+ correction=deepseek.correct(premise=data.article, summary=data.tag_summary)
63
+
64
+ print(correction)
65
+
66
+ result=detectionProcess(article=data.article,summary=correction,pipeline=pipeline)
67
+ result["corrected_summary"]=correction
68
+ result["status"]=200
69
+ return result
70
+ except Exception as e:
71
+ print(e)
72
+ return {"status":404}
73
+
74
+
75
+ @router.get("/test")
76
+ def keyTest():
77
+ print("Testing ",os.getenv("deepseek_apikey"))
78
+ return {"msg":"testing_works"}
79
+
80
+
81
+
82
+
83
+
84
+
85
+
86
+ # @router.post("/detect")
87
+ # def detect(data:DetectionRequest):
88
+ # article=data.article.strip().replace("\n","").replace("\t"," ")
89
+ # summary=data.summary.strip().replace("\n","").replace("\t"," ")
90
+
91
+ # result=pipeline.process([[article,summary]],correct_the_summary=False)
92
+ # all_sentences=sent_tokenize(summary)
93
+ # print(result)
94
+ # summary=pipeline.addTags(all_sentences,result["sent_predicted"][0],len(all_sentences))
95
+ # score=str(result["factual_score"][0])
96
+ # sentenceLabels=list(result["sent_predicted"][0])
97
+ # labelCounts=[sentenceLabels.count(0),sentenceLabels.count(2)]
98
+
99
+ # prompt=f"""
100
+ # Here is a summary with hallucinated parts marked using <xx> tags.
101
+
102
+ # Please correct only the text inside the <xx> tags to make it factually accurate based on the original article. Leave the rest of the summary unchanged and remove the <xx> tags after correction.
103
+
104
+ # Return the summary with hallucinated parts fixed and you can remove those <xx></xx> tags. Don't remove that entire sentence.
105
+
106
+ # Original Article:
107
+ # {data.article}
108
+
109
+ # Summary:
110
+ # {summary}
111
+
112
+ # """
113
+ # return {"summary":summary,"score":score,"counts":labelCounts,"copy_prompt":prompt}
app.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from Routes import router
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.staticfiles import StaticFiles
5
+ from fastapi.responses import FileResponse
6
+ from dotenv import load_dotenv
7
+
8
+
9
+ load_dotenv()
10
+
11
+
12
+ app=FastAPI()
13
+ app.include_router(router)
14
+ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_credentials=True)
15
+ app.mount("/assets",StaticFiles(directory="dist/assets"),name="assets")
16
+
17
+
18
+ @app.get("/{full_path:path}")
19
+ async def main():
20
+ return FileResponse("dist/index.html")
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ uvicorn==0.29.0
3
+ pydantic==2.1.1
4
+ python-dotenv==1.0.1
5
+ requests==2.32.3
6
+ nltk==3.9.1
7
+ transformers==4.53.2
8
+ torch==2.5.1+cu121
9
+ xgboost==3.0.0
10
+ scikit-learn==1.5.1
11
+ numpy==1.26.4
12
+ pandas==2.2.3
13
+ openai==1.3.7
14
+ google-generativeai==0.8.5
15
+ llama-cpp-python==0.3.12
utils.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from nltk.tokenize import sent_tokenize
2
+
3
+
4
+ # This method is created because it will be required for
5
+ # Detection as well as Correction (Like results after correction)
6
+
7
+ def detectionProcess(article,summary,pipeline,arbiter):
8
+ result=pipeline.process([[article,summary]],correct_the_summary=False,arbiter=arbiter)
9
+ all_sentences=sent_tokenize(summary)
10
+ print(result)
11
+ summary=pipeline.addTags(all_sentences,result["sent_predicted"][0],len(all_sentences))
12
+ score=str(result["factual_score"][0])
13
+ sentenceLabels=list(result["sent_predicted"][0])
14
+ labelCounts=[sentenceLabels.count(0),sentenceLabels.count(2)]
15
+
16
+ prompt=f"""
17
+ Here is a summary with hallucinated parts marked using <xx> tags.
18
+
19
+ Please correct only the text inside the <xx> tags to make it factually accurate based on the original article. Leave the rest of the summary unchanged and remove the <xx> tags after correction.
20
+
21
+ Return the summary with hallucinated parts fixed and you can remove those <xx></xx> tags. Don't remove that entire sentence.
22
+
23
+ Original Article:
24
+ {article}
25
+
26
+ Summary:
27
+ {summary}
28
+
29
+ """
30
+ return {"summary":summary,"score":score,"counts":labelCounts,"copy_prompt":prompt}