Spaces:
Sleeping
Sleeping
updated the prompt for Transcript
Browse files
app.py
CHANGED
|
@@ -41,30 +41,6 @@ model = MllamaForConditionalGeneration.from_pretrained(
|
|
| 41 |
# model.to(device)
|
| 42 |
processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
|
| 43 |
|
| 44 |
-
# @spaces.GPU # Use the free GPU provided by Hugging Face Spaces
|
| 45 |
-
# def predict(image, text):
|
| 46 |
-
# # Prepare the input messages
|
| 47 |
-
# messages = [
|
| 48 |
-
# {"role": "user", "content": [
|
| 49 |
-
# {"type": "image"}, # Specify that an image is provided
|
| 50 |
-
# {"type": "text", "text": text} # Add the user-provided text input
|
| 51 |
-
# ]}
|
| 52 |
-
# ]
|
| 53 |
-
|
| 54 |
-
# # Create the input text using the processor's chat template
|
| 55 |
-
# input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
|
| 56 |
-
|
| 57 |
-
# # Process the inputs and move to the appropriate device
|
| 58 |
-
# inputs = processor(image, input_text, return_tensors="pt").to(device)
|
| 59 |
-
|
| 60 |
-
# # Generate a response from the model
|
| 61 |
-
# outputs = model.generate(**inputs, max_new_tokens=100)
|
| 62 |
-
|
| 63 |
-
# # Decode the output to return the final response
|
| 64 |
-
# response = processor.decode(outputs[0], skip_special_tokens=True)
|
| 65 |
-
# return response
|
| 66 |
-
|
| 67 |
-
|
| 68 |
def extract_image_from_pdf(pdf_url, dpi=75):
|
| 69 |
"""
|
| 70 |
Extract first page of PDF as image in memory
|
|
@@ -133,41 +109,6 @@ def predict_image(image_url, text, file_pref):
|
|
| 133 |
# Decode the output to return the final response
|
| 134 |
response = processor.decode(outputs[0], skip_special_tokens=True)
|
| 135 |
|
| 136 |
-
# # Prepare the input messages
|
| 137 |
-
# messages = [
|
| 138 |
-
# {"role": "user", "content": [
|
| 139 |
-
# {"type": "image"}, # Specify that an image is provided
|
| 140 |
-
# {"type": "text", "text": text} # Add the user-provided text input
|
| 141 |
-
# ]}
|
| 142 |
-
# ]
|
| 143 |
-
|
| 144 |
-
# # Create the input text using the processor's chat template
|
| 145 |
-
# input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
|
| 146 |
-
|
| 147 |
-
# # Process the inputs and move to the appropriate device
|
| 148 |
-
# inputs = processor(image=image, text=input_text, return_tensors="pt").to("cuda")
|
| 149 |
-
|
| 150 |
-
# Generate a response from the model
|
| 151 |
-
# outputs = model.generate(**inputs, max_new_tokens=100)
|
| 152 |
-
|
| 153 |
-
# # Decode the output to return the final response
|
| 154 |
-
# response = processor.decode(outputs[0], skip_special_tokens=True)
|
| 155 |
-
|
| 156 |
-
# streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
|
| 157 |
-
|
| 158 |
-
# generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
|
| 159 |
-
# generated_text = ""
|
| 160 |
-
|
| 161 |
-
# thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 162 |
-
# thread.start()
|
| 163 |
-
# buffer = ""
|
| 164 |
-
|
| 165 |
-
# for new_text in streamer:
|
| 166 |
-
# buffer += new_text
|
| 167 |
-
# # generated_text_without_prompt = buffer
|
| 168 |
-
# # # time.sleep(0.01)
|
| 169 |
-
# # yield buffer
|
| 170 |
-
|
| 171 |
# return buffer
|
| 172 |
return response
|
| 173 |
|
|
@@ -258,20 +199,42 @@ PROMPT_SKILLS = (
|
|
| 258 |
|
| 259 |
|
| 260 |
PROMPT_IMAGE = (
|
| 261 |
-
"
|
| 262 |
-
"'
|
| 263 |
-
"
|
| 264 |
-
"
|
| 265 |
-
"
|
| 266 |
-
"
|
| 267 |
-
"
|
| 268 |
-
"'
|
| 269 |
-
"
|
| 270 |
-
"
|
| 271 |
-
"
|
| 272 |
-
"
|
| 273 |
-
"
|
| 274 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
)
|
| 276 |
|
| 277 |
|
|
@@ -299,8 +262,11 @@ def extract_info():
|
|
| 299 |
response = ''
|
| 300 |
|
| 301 |
if data["skills"] == True:
|
| 302 |
-
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
| 304 |
else:
|
| 305 |
response_skills = ''
|
| 306 |
|
|
|
|
| 41 |
# model.to(device)
|
| 42 |
processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
def extract_image_from_pdf(pdf_url, dpi=75):
|
| 45 |
"""
|
| 46 |
Extract first page of PDF as image in memory
|
|
|
|
| 109 |
# Decode the output to return the final response
|
| 110 |
response = processor.decode(outputs[0], skip_special_tokens=True)
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# return buffer
|
| 113 |
return response
|
| 114 |
|
|
|
|
| 199 |
|
| 200 |
|
| 201 |
PROMPT_IMAGE = (
|
| 202 |
+
"You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
|
| 203 |
+
"Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
|
| 204 |
+
"If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
|
| 205 |
+
"Ensure the format is consistent, strictly adhering to the structure shown below.\n\n"
|
| 206 |
+
"Required JSON Format:\n\n"
|
| 207 |
+
"{\n"
|
| 208 |
+
' "student": {\n'
|
| 209 |
+
' "name": "string",\n'
|
| 210 |
+
' "id": "string",\n'
|
| 211 |
+
' "dob": "string",\n'
|
| 212 |
+
' "original_start_date": "string",\n'
|
| 213 |
+
' "cumulative_gpa": "string",\n'
|
| 214 |
+
' "program": "string",\n'
|
| 215 |
+
' "status": "string"\n'
|
| 216 |
+
' },\n'
|
| 217 |
+
' "courses": [\n'
|
| 218 |
+
' {\n'
|
| 219 |
+
' "transfer_institution": "string",\n'
|
| 220 |
+
' "course_code": "string",\n'
|
| 221 |
+
' "course_name": "string",\n'
|
| 222 |
+
' "credits_attempted": number,\n'
|
| 223 |
+
' "credits_earned": number,\n'
|
| 224 |
+
' "grade": "string",\n'
|
| 225 |
+
' "quality_points": number,\n'
|
| 226 |
+
' "semester_code": "string",\n'
|
| 227 |
+
' "semester_dates": "string"\n'
|
| 228 |
+
' }\n'
|
| 229 |
+
" // Additional courses can be added here\n"
|
| 230 |
+
" ]\n"
|
| 231 |
+
"}\n\n"
|
| 232 |
+
"Instructions:\n\n"
|
| 233 |
+
"1. Extract the student information and course details as displayed in the image.\n"
|
| 234 |
+
"2. Use null for any missing or unavailable information.\n"
|
| 235 |
+
"3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
|
| 236 |
+
"4. Use accurate field names and ensure proper nesting of data (e.g., 'student' and 'courses' sections).\n"
|
| 237 |
+
"5. The values for numeric fields like credits_attempted, credits_earned, and quality_points should be numbers (not strings).\n"
|
| 238 |
)
|
| 239 |
|
| 240 |
|
|
|
|
| 262 |
response = ''
|
| 263 |
|
| 264 |
if data["skills"] == True:
|
| 265 |
+
if response:
|
| 266 |
+
prompt_skills = f"{PROMPT_SKILLS} using this information only -- {response}"
|
| 267 |
+
response_skills = predict_text(prompt_skills)
|
| 268 |
+
else:
|
| 269 |
+
response_skills = ''
|
| 270 |
else:
|
| 271 |
response_skills = ''
|
| 272 |
|