remove javascript code and start a python project

Files changed (10) hide show

.gitignore +164 -173
Pipfile +11 -0
README.md +0 -32
package-lock.json +0 -0
package.json +0 -25
src/data/dictionary_compact.json +0 -3
src/data/words.json +0 -42
src/index.js +0 -72
src/services/token.js +0 -51
src/services/token.test.js +0 -17

.gitignore CHANGED Viewed

@@ -1,6 +1,5 @@
-# Created by https://www.toptal.com/developers/gitignore/api/windows,node,macos,linux,sublimetext
-# Edit at https://www.toptal.com/developers/gitignore?templates=windows,node,macos,linux,sublimetext
 ### Linux ###
 *~
@@ -46,188 +45,180 @@ Network Trash Folder
 Temporary Items
 .apdisk
-### Node ###
-# Logs
-logs
-*.log
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
-lerna-debug.log*
-# Diagnostic reports (https://nodejs.org/api/report.html)
-report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
-# Runtime data
-pids
-*.pid
-*.seed
-*.pid.lock
-# Directory for instrumented libs generated by jscoverage/JSCover
-lib-cov
-# Coverage directory used by tools like istanbul
-coverage
-*.lcov
-# nyc test coverage
-.nyc_output
-# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
-.grunt
-# Bower dependency directory (https://bower.io/)
-bower_components
-# node-waf configuration
-.lock-wscript
-# Compiled binary addons (https://nodejs.org/api/addons.html)
-build/Release
-# Dependency directories
-node_modules/
-jspm_packages/
-# TypeScript v1 declaration files
-typings/
-# TypeScript cache
-*.tsbuildinfo
-# Optional npm cache directory
-.npm
-# Optional eslint cache
-.eslintcache
-# Optional stylelint cache
-.stylelintcache
-# Microbundle cache
-.rpt2_cache/
-.rts2_cache_cjs/
-.rts2_cache_es/
-.rts2_cache_umd/
-# Optional REPL history
-.node_repl_history
-# Output of 'npm pack'
-*.tgz
-# Yarn Integrity file
-.yarn-integrity
-# dotenv environment variables file
-.env
-.env.test
-.env*.local
-# parcel-bundler cache (https://parceljs.org/)
-.cache
-.parcel-cache
-# Next.js build output
-.next
-# Nuxt.js build / generate output
-.nuxt
-dist
-# Storybook build outputs
-.out
-.storybook-out
-storybook-static
-# rollup.js default build output
-dist/
-# Gatsby files
-.cache/
-# Comment in the public line in if your project uses Gatsby and not Next.js
-# https://nextjs.org/blog/next-9-1#public-directory-support
-# public
-# vuepress build output
-.vuepress/dist
-# Serverless directories
-.serverless/
-# FuseBox cache
-.fusebox/
-# DynamoDB Local files
-.dynamodb/
-# TernJS port file
-.tern-port
-# Stores VSCode versions used for testing VSCode extensions
-.vscode-test
-# Temporary folders
-tmp/
-temp/
-### SublimeText ###
-# Cache files for Sublime Text
-*.tmlanguage.cache
-*.tmPreferences.cache
-*.stTheme.cache
-# Workspace files are user-specific
-*.sublime-workspace
-# Project files should be checked into the repository, unless a significant
-# proportion of contributors will probably not be using Sublime Text
-# *.sublime-project
-# SFTP configuration file
-sftp-config.json
-# Package control specific files
-Package Control.last-run
-Package Control.ca-list
-Package Control.ca-bundle
-Package Control.system-ca-bundle
-Package Control.cache/
-Package Control.ca-certs/
-Package Control.merged-ca-bundle
-Package Control.user-ca-bundle
-oscrypto-ca-bundle.crt
-bh_unicode_properties.cache
-# Sublime-github package stores a github token in this file
-# https://packagecontrol.io/packages/sublime-github
-GitHub.sublime-settings
-### Windows ###
-# Windows thumbnail cache files
-Thumbs.db
-Thumbs.db:encryptable
-ehthumbs.db
-ehthumbs_vista.db
-# Dump file
-*.stackdump
-# Folder config file
-[Dd]esktop.ini
-# Recycle Bin used on file shares
-$RECYCLE.BIN/
-# Windows Installer files
-*.cab
-*.msi
-*.msix
-*.msm
-*.msp
-# Windows shortcuts
-*.lnk
-# End of https://www.toptal.com/developers/gitignore/api/windows,node,macos,linux,sublimetext

+# Created by https://www.toptal.com/developers/gitignore/api/macos,python,linux
+# Edit at https://www.toptal.com/developers/gitignore?templates=macos,python,linux
 ### Linux ###
 *~
 Temporary Items
 .apdisk
+### macOS Patch ###
+# iCloud generated files
+*.icloud
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+# End of https://www.toptal.com/developers/gitignore/api/macos,python,linux

Pipfile ADDED Viewed

	@@ -0,0 +1,11 @@

+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+[packages]
+[dev-packages]
+[requires]
+python_version = "3.12"

README.md DELETED Viewed

@@ -1,32 +0,0 @@
-## Rellow
-## Contributing
-1. Install [Node](https://nodejs.org/en/). Download the "Recommend for Most Users" version.
-2. Clone the repo:
-``` bash
-git clone git@github.com:rafaelcamargo/rellow.git
-```
-3. Go to the project directory
-``` bash
-cd rellow
-```
-4. Install the project dependencies
-``` bash
-npm install
-```
-5. Run the experiment
-``` bash
-npm run start
-```
-## Tests
-In case you have changed any website behavior, ensure that all changes are covered with automated tests:
-``` bash
-npm run test
-```

package-lock.json DELETED Viewed

The diff for this file is too large to render. See raw diff

package.json DELETED Viewed

@@ -1,25 +0,0 @@
-{
-  "name": "rellow",
-  "version": "0.1.0",
-  "description": "Imaginary word generator",
-  "main": "src/index.js",
-  "scripts": {
-    "test": "jest",
-    "start": "node ./src/index"
-  },
-  "keywords": [
-    "ai",
-    "generative",
-    "supervised",
-    "learning",
-    "word"
-  ],
-  "author": "Rafael Camargo <hello@rafelcamargo.com>",
-  "license": "UNLICENSED",
-  "devDependencies": {
-    "jest": "^29.7.0"
-  },
-  "dependencies": {
-    "@tensorflow/tfjs-node": "^4.22.0"
-  }
-}

src/data/dictionary_compact.json DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4636de7bb1026b772680a075efc093871c0362fcdbbf588fa7d2f060492b44a4
-size 22458934

src/data/words.json DELETED Viewed

@@ -1,42 +0,0 @@
-[
-  {
-    "word": "Eloquent",
-    "definition": "Fluent or persuasive in speaking or writing"
-  },
-  {
-    "word": "Serene",
-    "definition": "Peaceful and untroubled"
-  },
-  {
-    "word": "Vivid",
-    "definition": "Producing powerful feelings or strong"
-  },
-  {
-    "word": "Ardent",
-    "definition": "Enthusiastic or passionate"
-  },
-  {
-    "word": "Fragrant",
-    "definition": "Having a pleasant or sweet smell"
-  },
-  {
-    "word": "Diligent",
-    "definition": "Showing care and conscientiousness in one's work or duties"
-  },
-  {
-    "word": "Imminent",
-    "definition": "About to happen"
-  },
-  {
-    "word": "Opaque",
-    "definition": "Not able to be seen through"
-  },
-  {
-    "word": "Subtle",
-    "definition": "Delicate or precise as to be difficult to perceive or analyze"
-  },
-  {
-    "word": "Ethereal",
-    "definition": "Extremely delicate and light in a way that seems not of this world"
-  }
-]

src/index.js DELETED Viewed

@@ -1,72 +0,0 @@
-const tf = require('@tensorflow/tfjs-node');
-const tokenService = require('./services/token');
-const dictionary = require('./data/words');
-const dataset = dictionary.map(({ word, definition }) => {
-  const encodedDefinition = tokenService.encode(definition.replace(/,;\./g, '').split(' '));
-  const encodedWord = tokenService.encode([word]);
-  const necessaryPad = encodedDefinition.length - encodedWord.length;
-  const padding = new Array(necessaryPad).fill(0);
-  const finalWord = encodedWord.concat(padding)
-  return {
-    word: finalWord,
-    definition: encodedDefinition,
-  }
-})
-const dictionatySize = 200 // random big number. understand how to better set it
-const maxInputSentenceSize = dataset.reduce((maxSize, { word, definition }) => {
-  return definition.length > maxSize ? definition.length : maxSize;
-}, 0);
-async function run() {
-  const model = tf.sequential();
-  // Embedding layer for word representations
-  model.add(tf.layers.embedding({inputDim: dictionatySize, outputDim: 64, inputLength: maxInputSentenceSize}));
-  // LSTM layer for capturing sequence information
-  model.add(tf.layers.lstm({units: 128, returnSequences: true}));
-  // Dense layer to output a word for each position in the sentence
-  // model.add(tf.layers.dense({units: maxInputSentenceSize, activation: 'softmax'}));
-  model.add(tf.layers.dense({units: dictionatySize, activation: 'softmax'}));
-  model.compile({loss: 'categoricalCrossentropy', optimizer: 'adam'});
-  // Prepare input and output sequences as tensors
-  const trainingWords = dataset.map(({ word }) => tokenService.padEncoding(word, maxInputSentenceSize))
-  const trainingDefinitions = dataset.map(({ definition }) => tokenService.padEncoding(definition, maxInputSentenceSize))
-  const tensorWords = tf.tensor2d(trainingWords); // shape: [numSamples, maxInputLength]
-  // const tensorDefinitions = tf.tensor2d(trainingDefinitions); // shape: [numSamples, maxOutputLength, vocabSize]
-  const tensorDefinitions = tf.tensor3d(
-    trainingDefinitions.map(def => tokenService.oneHotEncode(def, dictionatySize)),
-    [trainingDefinitions.length, maxInputSentenceSize, dictionatySize]
-  );
-  // Train the model on text sequences
-  await model.fit(tensorWords, tensorDefinitions, {epochs: 100});
-  // predict(model, 'Serene') // Understand why definition is not right even for a word already defined in the training dataset
-  predict(model, 'Smoker');
-}
-function predict(model, newWord){
-  let encodedWord = tokenService.encode([newWord]);
-  // Ensure padding
-  encodedWord = tokenService.padEncoding(encodedWord, maxInputSentenceSize);
-  // Convert to tensor
-  const wordTensor = tf.tensor2d([encodedWord]);
-  // Generate prediction
-  const prediction = model.predict(wordTensor);
-  // Decode the predicted tokens
-  const predictedTokens = prediction.argMax(2).arraySync()[0]; // Get token IDs
-  console.log({ predictedTokens })
-  const predictedDefinition = predictedTokens.map(tokenId => tokenService.decodeToken(tokenId)).join(' ');
-  console.log(`Generated Definition for '${newWord}': ${predictedDefinition.replace(/0/g, '').trim()}`);
-}
-run()

src/services/token.js DELETED Viewed

@@ -1,51 +0,0 @@
-const _public = {};
-const dictionary = [];
-_public.encode = words => {
-  return words.map(word => {
-    const code = findWordCode(word);
-    if(code !== -1) return code + 1;
-    dictionary.push(word);
-    return dictionary.length;
-  });
-};
-_public.decode = codes => {
-  return codes.map(code => {
-    return dictionary[code - 1];
-  });
-};
-_public.decodeToken = code => {
-  return dictionary[code - 1] || 0;
-}
-_public.oneHotEncode = (sequence, vocabSize) => {
-  return sequence.map(tokenId => {
-    const oneHot = new Array(vocabSize).fill(0);
-    if (tokenId >= 0 && tokenId < vocabSize) {
-      oneHot[tokenId] = 1;
-    }
-    return oneHot;
-  });
-};
-_public.padEncoding = (encoding, minLength) => {
-  const necessaryPadding = minLength - encoding.length;
-  if(necessaryPadding > 0) {
-    const padding = new Array(necessaryPadding).fill(0);
-    return encoding.concat(padding);
-  }
-  return encoding;
-};
-_public.resetDictionary = () => {
-  dictionary.length = 0;
-};
-function findWordCode(word){
-  return dictionary.indexOf(word);
-}
-module.exports = _public;

src/services/token.test.js DELETED Viewed

@@ -1,17 +0,0 @@
-const tokenService = require('./token')
-describe('Token Service', () => {
-  afterEach(() => {
-    tokenService.resetDictionary()
-  });
-  it('should encode/decode words', () => {
-    expect(tokenService.encode(['glorious', 'times'])).toEqual([1, 2]);
-    expect(tokenService.decode([2, 1])).toEqual(['times', 'glorious']);
-  });
-  it('should pad encoding result when lower than minimum length', () => {
-    expect(tokenService.padEncoding([1,2], 5)).toEqual([1,2,0,0,0]);
-    expect(tokenService.padEncoding([1,2,3,4,5,6], 6)).toEqual([1,2,3,4,5,6]);
-  })
-});