Spaces:

navid72m
/

pdfexplorer

Runtime error

navid72m commited on May 23, 2024

Commit

6f9ac18

verified ·

1 Parent(s): 0456c8a

Update selection.py

Files changed (1) hide show

selection.py CHANGED Viewed

@@ -43,15 +43,28 @@ instruction = most_relevant_context[:300] + " " + Question
 gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset("gemma_2b_en")
 gemma_lm.save('saved_model/gemma_2b_en')
-# Convert the saved model to TensorFlow Lite format with quantization
 saved_model_dir = 'saved_model/gemma_2b_en'
 converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
 converter.optimizations = [tf.lite.Optimize.DEFAULT]
-tflite_model = converter.convert()
 # Save the quantized model
 with open('gemma_2b_en_quantized.tflite', 'wb') as f:
-    f.write(tflite_model)
 # Load the quantized model and run inference
 interpreter = tf.lite.Interpreter(model_path='gemma_2b_en_quantized.tflite')
@@ -61,8 +74,9 @@ input_details = interpreter.get_input_details()
 output_details = interpreter.get_output_details()
 def preprocess_input(instruction):
-    # Convert the input to the required format and shape
-    input_data = np.array([instruction], dtype=np.float32)
     return input_data
 input_data = preprocess_input(instruction)

 gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset("gemma_2b_en")
 gemma_lm.save('saved_model/gemma_2b_en')
+# Convert the saved model to TensorFlow Lite format with 8-bit full integer quantization
 saved_model_dir = 'saved_model/gemma_2b_en'
 converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
 converter.optimizations = [tf.lite.Optimize.DEFAULT]
+# Representative dataset function for quantization
+def representative_dataset_gen():
+    for _ in range(100):
+        # Example input array, replace with your actual data
+        data = np.random.rand(1, 300).astype(np.float32)
+        yield [data]
+converter.representative_dataset = representative_dataset_gen
+converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+converter.inference_input_type = tf.uint8
+converter.inference_output_type = tf.uint8
+tflite_model_quant = converter.convert()
 # Save the quantized model
 with open('gemma_2b_en_quantized.tflite', 'wb') as f:
+    f.write(tflite_model_quant)
 # Load the quantized model and run inference
 interpreter = tf.lite.Interpreter(model_path='gemma_2b_en_quantized.tflite')
 output_details = interpreter.get_output_details()
 def preprocess_input(instruction):
+    # Tokenization and padding to match input shape
+    # This is a placeholder; replace it with your actual preprocessing code
+    input_data = np.array([[ord(c) for c in instruction]], dtype=np.uint8)
     return input_data
 input_data = preprocess_input(instruction)