rafacamargo commited on
Commit
751ad61
·
1 Parent(s): 279b0d3

chore: add necessary files for huggingface to expose an inference endpoint to the llm

Browse files
Files changed (3) hide show
  1. handler.py +20 -0
  2. requirements.txt +3 -0
  3. src/prediction.py +8 -4
handler.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import sys
3
+ import os
4
+
5
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
6
+ from prediction import main
7
+
8
+ class EndpointHandler:
9
+ def __init__(self, model_dir, **kwargs):
10
+ # Load your model (.pt file)
11
+ model_path = f"{model_dir}/src/model/rellow-2.pt"
12
+ self.model = torch.load(model_path, map_location="cpu")
13
+ self.model.eval()
14
+
15
+ def __call__(self, data: dict):
16
+ inputs = data.get("words", [])
17
+ if not inputs or len(inputs) != 3:
18
+ return {"error": "Expected exactly three words"}
19
+ output = main(words=inputs)
20
+ return {"generated": output}
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ tiktoken==0.7.0
2
+ torch==2.7.1
3
+ numpy==2.3.0
src/prediction.py CHANGED
@@ -36,14 +36,18 @@ def generate_word(words, model, vocab, inv_vocab, max_length=64):
36
 
37
  return output_text
38
 
39
- def main():
40
  # Load model and vocabulary
41
  model, vocab, inv_vocab = load_model()
42
 
43
- # Example usage
44
- words = ["muito", "grande", "imenso"]
 
 
45
  result = generate_word(words, model, vocab, inv_vocab)
46
  print(f"Input words: {', '.join(words)}")
47
  print(f"Generated: {result}")
 
48
 
49
- main()
 
 
36
 
37
  return output_text
38
 
39
+ def main(words=None):
40
  # Load model and vocabulary
41
  model, vocab, inv_vocab = load_model()
42
 
43
+ # Use provided words or default example
44
+ if words is None:
45
+ words = ["muito", "grande", "imenso"]
46
+
47
  result = generate_word(words, model, vocab, inv_vocab)
48
  print(f"Input words: {', '.join(words)}")
49
  print(f"Generated: {result}")
50
+ return result
51
 
52
+ if __name__ == "__main__":
53
+ main()