Spaces:

sagaxlearn
/

testing-groq

Sleeping

App Files Files Community

khurrameycon commited on Nov 29, 2024

Commit

09fd8b1

verified ·

1 Parent(s): d228bc3

updated the prompt for Transcript

Browse files

Files changed (1) hide show

app.py +41 -75

app.py CHANGED Viewed

@@ -41,30 +41,6 @@ model = MllamaForConditionalGeneration.from_pretrained(
 # model.to(device)
 processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
-# @spaces.GPU  # Use the free GPU provided by Hugging Face Spaces
-# def predict(image, text):
-#     # Prepare the input messages
-#     messages = [
-#         {"role": "user", "content": [
-#             {"type": "image"},  # Specify that an image is provided
-#             {"type": "text", "text": text}  # Add the user-provided text input
-#         ]}
-#     ]
-#     # Create the input text using the processor's chat template
-#     input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
-#     # Process the inputs and move to the appropriate device
-#     inputs = processor(image, input_text, return_tensors="pt").to(device)
-#     # Generate a response from the model
-#     outputs = model.generate(**inputs, max_new_tokens=100)
-#     # Decode the output to return the final response
-#     response = processor.decode(outputs[0], skip_special_tokens=True)
-#     return response
 def extract_image_from_pdf(pdf_url, dpi=75):
     """
     Extract first page of PDF as image in memory
@@ -133,41 +109,6 @@ def predict_image(image_url, text, file_pref):
         # Decode the output to return the final response
         response = processor.decode(outputs[0], skip_special_tokens=True)
-        # # Prepare the input messages
-        # messages = [
-        #     {"role": "user", "content": [
-        #         {"type": "image"},  # Specify that an image is provided
-        #         {"type": "text", "text": text}  # Add the user-provided text input
-        #     ]}
-        # ]
-        # # Create the input text using the processor's chat template
-        # input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
-        # # Process the inputs and move to the appropriate device
-        # inputs = processor(image=image, text=input_text, return_tensors="pt").to("cuda")
-        # Generate a response from the model
-        # outputs = model.generate(**inputs, max_new_tokens=100)
-        # # Decode the output to return the final response
-        # response = processor.decode(outputs[0], skip_special_tokens=True)
-        # streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
-        # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
-        # generated_text = ""
-        # thread = Thread(target=model.generate, kwargs=generation_kwargs)
-        # thread.start()
-        # buffer = ""
-        # for new_text in streamer:
-        #     buffer += new_text
-        #     # generated_text_without_prompt = buffer
-        #     # # time.sleep(0.01)
-        #     # yield buffer
         # return buffer
         return response
@@ -258,20 +199,42 @@ PROMPT_SKILLS = (
 PROMPT_IMAGE = (
-    "Extract the following information from this image:\n"
-    "'Student Name'\n"
-    "'Transfer Institution'\n"
-    "'Course Code'\n"
-    "'Course Name'\n"
-    "'Credits Attempted'\n"
-    "'Credits Earned'\n"
-    "'Grade'\n"
-    "'Quality Points'\n"
-    "'Semester Code'\n"
-    "'Semester Dates'\n"
-    "'Program or Major'\n"
-    "'Cumulative GPA'\n"
-    "Only provide the requested information without adding any extra details."
 )
@@ -299,8 +262,11 @@ def extract_info():
             response = ''
         if data["skills"] == True:
-            prompt_skills = f"{PROMPT_SKILLS} using this information only -- {response}"
-            response_skills = predict_text(prompt_skills)
         else:
             response_skills = ''

 # model.to(device)
 processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
 def extract_image_from_pdf(pdf_url, dpi=75):
     """
     Extract first page of PDF as image in memory
         # Decode the output to return the final response
         response = processor.decode(outputs[0], skip_special_tokens=True)
         # return buffer
         return response
 PROMPT_IMAGE = (
+    "You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
+    "Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
+    "If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
+    "Ensure the format is consistent, strictly adhering to the structure shown below.\n\n"
+    "Required JSON Format:\n\n"
+    "{\n"
+    '  "student": {\n'
+    '    "name": "string",\n'
+    '    "id": "string",\n'
+    '    "dob": "string",\n'
+    '    "original_start_date": "string",\n'
+    '    "cumulative_gpa": "string",\n'
+    '    "program": "string",\n'
+    '    "status": "string"\n'
+    '  },\n'
+    '  "courses": [\n'
+    '    {\n'
+    '      "transfer_institution": "string",\n'
+    '      "course_code": "string",\n'
+    '      "course_name": "string",\n'
+    '      "credits_attempted": number,\n'
+    '      "credits_earned": number,\n'
+    '      "grade": "string",\n'
+    '      "quality_points": number,\n'
+    '      "semester_code": "string",\n'
+    '      "semester_dates": "string"\n'
+    '    }\n'
+    "    // Additional courses can be added here\n"
+    "  ]\n"
+    "}\n\n"
+    "Instructions:\n\n"
+    "1. Extract the student information and course details as displayed in the image.\n"
+    "2. Use null for any missing or unavailable information.\n"
+    "3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
+    "4. Use accurate field names and ensure proper nesting of data (e.g., 'student' and 'courses' sections).\n"
+    "5. The values for numeric fields like credits_attempted, credits_earned, and quality_points should be numbers (not strings).\n"
 )
             response = ''
         if data["skills"] == True:
+            if response:
+                prompt_skills = f"{PROMPT_SKILLS} using this information only -- {response}"
+                response_skills = predict_text(prompt_skills)
+            else:
+                response_skills = ''
         else:
             response_skills = ''