chore: add necessary files for huggingface to expose an inference endpoint to the llm

Files changed (3) hide show

handler.py ADDED Viewed

+import torch
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
+from prediction import main
+class EndpointHandler:
+  def __init__(self, model_dir, **kwargs):
+    # Load your model (.pt file)
+    model_path = f"{model_dir}/src/model/rellow-2.pt"
+    self.model = torch.load(model_path, map_location="cpu")
+    self.model.eval()
+  def __call__(self, data: dict):
+    inputs = data.get("words", [])
+    if not inputs or len(inputs) != 3:
+      return {"error": "Expected exactly three words"}
+    output = main(words=inputs)
+    return {"generated": output}

requirements.txt ADDED Viewed

+tiktoken==0.7.0
+torch==2.7.1
+numpy==2.3.0

src/prediction.py CHANGED Viewed

@@ -36,14 +36,18 @@ def generate_word(words, model, vocab, inv_vocab, max_length=64):
   return output_text
-def main():
   # Load model and vocabulary
   model, vocab, inv_vocab = load_model()
-  # Example usage
-  words = ["muito", "grande", "imenso"]
   result = generate_word(words, model, vocab, inv_vocab)
   print(f"Input words: {', '.join(words)}")
   print(f"Generated: {result}")
-main()

   return output_text
+def main(words=None):
   # Load model and vocabulary
   model, vocab, inv_vocab = load_model()
+  # Use provided words or default example
+  if words is None:
+    words = ["muito", "grande", "imenso"]
   result = generate_word(words, model, vocab, inv_vocab)
   print(f"Input words: {', '.join(words)}")
   print(f"Generated: {result}")
+  return result
+if __name__ == "__main__":
+  main()