Josebert commited on
Commit
2c2d398
·
verified ·
1 Parent(s): 178bc1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -61
app.py CHANGED
@@ -1,78 +1,128 @@
1
- from vllm import LLM
2
- from vllm.sampling_params import SamplingParams
3
- from datetime import datetime, timedelta
4
- from huggingface_hub import hf_hub_download
5
  import requests
6
  import json
 
 
 
7
  from pdf2image import convert_from_path
8
  import easyocr
9
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  # Initialize OCR reader
11
  reader = easyocr.Reader(['en'])
12
 
13
- # ... existing SYSTEM_PROMPT and load_system_prompt definitions ...
 
14
 
15
- def process_document(file_path):
16
- # Handle PDF files
17
- if file_path.lower().endswith('.pdf'):
18
- images = convert_from_path(file_path)
19
- extracted_text = ""
20
- for image in images:
21
- ocr_results = reader.readtext(image, detail=0)
22
- extracted_text += " ".join(ocr_results) + "\n"
23
- # Handle image files
24
- else:
25
- ocr_results = reader.readtext(file_path, detail=0)
26
- extracted_text = " ".join(ocr_results)
 
 
 
 
 
27
 
28
- return extracted_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def generate_response(file_path):
31
- # Extract text from document
32
- extracted_text = process_document(file_path)
33
-
34
- # Prepare messages for the LLM
35
- messages = [
36
- {"role": "system", "content": SYSTEM_PROMPT},
37
- {
38
- "role": "user",
39
- "content": [
40
- {
41
- "type": "text",
42
- "text": f"Process this extracted text, correct any errors and enhance it:\n{extracted_text}",
43
- }
44
- ],
45
- },
46
- ]
47
-
48
- # Initialize the LLM
49
- llm = LLM(model="mistralai/Mistral-Small-3.1-24B-Instruct-2503", tokenizer_mode="mistral")
50
-
51
- # Define sampling parameters
52
- sampling_params = SamplingParams(max_tokens=512, temperature=0.15)
53
-
54
- # Get the response from the LLM
55
- outputs = llm.chat(messages, sampling_params=sampling_params)
56
-
57
- return outputs[0].outputs[0].text
58
 
59
- # Example usage
60
- if __name__ == "__main__":
61
- document_path = "path/to/your/document.pdf" # or .jpg/.png
62
- response = generate_response(document_path)
63
- print(response)
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- # Gradio interface
66
- with gr.Blocks() as demo:
67
  gr.Markdown("# Document Processing with Mistral")
68
- file_input = gr.File(label="Upload PDF or Image")
69
- output_text = gr.Textbox(label="Processed Text", lines=10)
70
- submit_btn = gr.Button("Process Document")
71
 
72
- submit_btn.click(
73
- fn=generate_response,
74
- inputs=file_input,
75
- outputs=output_text
76
- )
 
 
 
 
 
 
77
 
78
- demo.launch()
 
 
1
+ import os
2
+ import gradio as gr
 
 
3
  import requests
4
  import json
5
+ import logging
6
+ from datetime import datetime
7
+ import random
8
  from pdf2image import convert_from_path
9
  import easyocr
10
 
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # API configuration
16
+ api_token = os.getenv("API_TOKEN")
17
+ if not api_token:
18
+ raise ValueError("API token not found. Make sure 'API_TOKEN' is set in the Secrets.")
19
+
20
+ API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
21
+ HEADERS = {"Authorization": f"Bearer {api_token}"}
22
+
23
  # Initialize OCR reader
24
  reader = easyocr.Reader(['en'])
25
 
26
+ # Define a system prompt (example)
27
+ SYSTEM_PROMPT = "You are a helpful assistant that corrects and enhances text."
28
 
29
+ def get_unique_parameters():
30
+ """Generate unique parameters for each request"""
31
+ return {
32
+ "temperature": random.uniform(0.7, 0.9),
33
+ "top_p": random.uniform(0.85, 0.95),
34
+ "timestamp": datetime.now().strftime("%H%M%S"),
35
+ }
36
+
37
+ def make_api_call(prompt, params):
38
+ """Unified API call handler"""
39
+ payload = {
40
+ "inputs": f"{prompt} [ts:{params['timestamp']}]",
41
+ "parameters": {
42
+ "temperature": params["temperature"],
43
+ "top_p": params["top_p"]
44
+ }
45
+ }
46
 
47
+ try:
48
+ response = requests.post(API_URL, headers=HEADERS, json=payload)
49
+ response.raise_for_status()
50
+ return response.json()
51
+ except Exception as e:
52
+ logger.error(f"API Error: {e}")
53
+ return None
54
+
55
+ def process_document(file_path):
56
+ try:
57
+ # Handle PDF files
58
+ if file_path.lower().endswith('.pdf'):
59
+ images = convert_from_path(file_path)
60
+ extracted_text = ""
61
+ for image in images:
62
+ ocr_results = reader.readtext(image, detail=0)
63
+ extracted_text += " ".join(ocr_results) + "\n"
64
+ # Handle image files
65
+ else:
66
+ ocr_results = reader.readtext(file_path, detail=0)
67
+ extracted_text = " ".join(ocr_results)
68
+
69
+ return extracted_text
70
+ except Exception as e:
71
+ logger.error(f"Error processing document: {e}")
72
+ return ""
73
 
74
  def generate_response(file_path):
75
+ try:
76
+ # Extract text from document
77
+ extracted_text = process_document(file_path)
78
+
79
+ if not extracted_text.strip():
80
+ return "No text extracted from the document."
81
+
82
+ params = get_unique_parameters()
83
+ prompt = f"Process this extracted text, correct any errors and enhance it:\n{extracted_text}"
84
+
85
+ result = make_api_call(prompt, params)
86
+ if result:
87
+ return result[0].get("generated_text", "No response from the model.")
88
+ else:
89
+ return "Error processing the document."
90
+ except Exception as e:
91
+ logger.error(f"Error generating response: {e}")
92
+ return "Error processing the document."
 
 
 
 
 
 
 
 
 
93
 
94
+ # Interface styling
95
+ css = """
96
+ .gradio-container {
97
+ font-family: 'Arial', sans-serif !important;
98
+ max-width: 1200px !important;
99
+ margin: auto !important;
100
+ }
101
+ .gr-button {
102
+ background-color: #2e5090 !important;
103
+ color: white !important;
104
+ }
105
+ .gr-input {
106
+ border: 2px solid #ddd !important;
107
+ border-radius: 8px !important;
108
+ }
109
+ """
110
 
111
+ # Create interface
112
+ with gr.Blocks(css=css, theme=gr.themes.Default()) as demo:
113
  gr.Markdown("# Document Processing with Mistral")
 
 
 
114
 
115
+ with gr.Tabs():
116
+ with gr.Tab("Document Processing"):
117
+ file_input = gr.File(label="Upload PDF or Image")
118
+ output_text = gr.Textbox(label="Processed Text", lines=10)
119
+ submit_btn = gr.Button("Process Document")
120
+
121
+ submit_btn.click(
122
+ fn=generate_response,
123
+ inputs=file_input,
124
+ outputs=output_text
125
+ )
126
 
127
+ if __name__ == "__main__":
128
+ demo.launch(share=True)