Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,23 +5,23 @@ import re
|
|
| 5 |
import gradio as gr
|
| 6 |
import os
|
| 7 |
import accelerate
|
| 8 |
-
import spaces
|
| 9 |
import subprocess
|
| 10 |
-
from huggingface_hub import hf_hub_download, InferenceClient
|
| 11 |
-
from llama_cpp import Llama
|
| 12 |
|
| 13 |
-
from huggingface_hub import login
|
| 14 |
-
login(token = os.getenv('HF_TOKEN'))
|
| 15 |
|
| 16 |
-
repo_id = "srijaydeshpande/Deid-Fine-Tuned"
|
| 17 |
-
model_id = "deid_finetuned.Q4_K_M.gguf"
|
| 18 |
|
| 19 |
|
| 20 |
-
hf_hub_download(
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
)
|
| 25 |
|
| 26 |
def process_document(pdf_path):
|
| 27 |
extracted_pages = extract_pages(pdf_path)
|
|
@@ -142,7 +142,7 @@ def deidentify_doc(llm_type, pdftext, maxtokens, temperature, top_probability):
|
|
| 142 |
return response
|
| 143 |
|
| 144 |
|
| 145 |
-
@spaces.GPU(duration=80)
|
| 146 |
def pdf_to_text(files, llm_type, maxtokens=2048, temperature=0, top_probability=0.95):
|
| 147 |
files=[files]
|
| 148 |
for file in files:
|
|
@@ -158,12 +158,12 @@ def pdf_to_text(files, llm_type, maxtokens=2048, temperature=0, top_probability=
|
|
| 158 |
for page_id in page2content:
|
| 159 |
pdftext = page2content[page_id]
|
| 160 |
original_pdf_text += pdftext + '\n'
|
| 161 |
-
response_generator = deidentify_doc(llm_type, pdftext, maxtokens, temperature, top_probability)
|
| 162 |
-
print('RESPONSE GENERATOR IS ',response_generator)
|
| 163 |
# for chunk in response_generator:
|
| 164 |
# accumulated_text += chunk
|
| 165 |
# yield accumulated_text # Keep updating output
|
| 166 |
-
return response_generator # + "\n\n"
|
| 167 |
print('Extracted Page Content Is ', original_pdf_text)
|
| 168 |
print('------------------------------------------------------------')
|
| 169 |
# return anonymized_text
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
import os
|
| 7 |
import accelerate
|
| 8 |
+
# import spaces
|
| 9 |
import subprocess
|
| 10 |
+
# from huggingface_hub import hf_hub_download, InferenceClient
|
| 11 |
+
# from llama_cpp import Llama
|
| 12 |
|
| 13 |
+
# from huggingface_hub import login
|
| 14 |
+
# login(token = os.getenv('HF_TOKEN'))
|
| 15 |
|
| 16 |
+
# repo_id = "srijaydeshpande/Deid-Fine-Tuned"
|
| 17 |
+
# model_id = "deid_finetuned.Q4_K_M.gguf"
|
| 18 |
|
| 19 |
|
| 20 |
+
# hf_hub_download(
|
| 21 |
+
# repo_id="srijaydeshpande/Deid-Fine-Tuned",
|
| 22 |
+
# filename="deid_finetuned.Q4_K_M.gguf",
|
| 23 |
+
# local_dir = "./models"
|
| 24 |
+
# )
|
| 25 |
|
| 26 |
def process_document(pdf_path):
|
| 27 |
extracted_pages = extract_pages(pdf_path)
|
|
|
|
| 142 |
return response
|
| 143 |
|
| 144 |
|
| 145 |
+
# @spaces.GPU(duration=80)
|
| 146 |
def pdf_to_text(files, llm_type, maxtokens=2048, temperature=0, top_probability=0.95):
|
| 147 |
files=[files]
|
| 148 |
for file in files:
|
|
|
|
| 158 |
for page_id in page2content:
|
| 159 |
pdftext = page2content[page_id]
|
| 160 |
original_pdf_text += pdftext + '\n'
|
| 161 |
+
# response_generator = deidentify_doc(llm_type, pdftext, maxtokens, temperature, top_probability)
|
| 162 |
+
# print('RESPONSE GENERATOR IS ',response_generator)
|
| 163 |
# for chunk in response_generator:
|
| 164 |
# accumulated_text += chunk
|
| 165 |
# yield accumulated_text # Keep updating output
|
| 166 |
+
# return response_generator # + "\n\n"
|
| 167 |
print('Extracted Page Content Is ', original_pdf_text)
|
| 168 |
print('------------------------------------------------------------')
|
| 169 |
# return anonymized_text
|