Buckets:
| namespace Faker\Provider; | |
| abstract class Text extends Base | |
| { | |
| protected static $baseText = ''; | |
| protected static $separator = ' '; | |
| protected static $separatorLen = 1; | |
| protected $explodedText; | |
| protected $consecutiveWords = []; | |
| protected static $textStartsWithUppercase = true; | |
| /** | |
| * Generate a text string by the Markov chain algorithm. | |
| * | |
| * Depending on the $maxNbChars, returns a random valid looking text. The algorithm | |
| * generates a weighted table with the specified number of words as the index and the | |
| * possible following words as the value. | |
| * | |
| * @example 'Alice, swallowing down her flamingo, and began by taking the little golden key' | |
| * | |
| * @param int $maxNbChars Maximum number of characters the text should contain (minimum: 10) | |
| * @param int $indexSize Determines how many words are considered for the generation of the next word. | |
| * The minimum is 1, and it produces a higher level of randomness, although the | |
| * generated text usually doesn't make sense. Higher index sizes (up to 5) | |
| * produce more correct text, at the price of less randomness. | |
| * | |
| * @return string | |
| */ | |
| public function realText($maxNbChars = 200, $indexSize = 2) | |
| { | |
| return $this->realTextBetween((int) round($maxNbChars * 0.8), $maxNbChars, $indexSize); | |
| } | |
| /** | |
| * Generate a text string by the Markov chain algorithm. | |
| * | |
| * Depending on the $maxNbChars, returns a random valid looking text. The algorithm | |
| * generates a weighted table with the specified number of words as the index and the | |
| * possible following words as the value. | |
| * | |
| * @example 'Alice, swallowing down her flamingo, and began by taking the little golden key' | |
| * | |
| * @param int $minNbChars Minimum number of characters the text should contain (maximum: 8) | |
| * @param int $maxNbChars Maximum number of characters the text should contain (minimum: 10) | |
| * @param int $indexSize Determines how many words are considered for the generation of the next word. | |
| * The minimum is 1, and it produces a higher level of randomness, although the | |
| * generated text usually doesn't make sense. Higher index sizes (up to 5) | |
| * produce more correct text, at the price of less randomness. | |
| * | |
| * @return string | |
| */ | |
| public function realTextBetween($minNbChars = 160, $maxNbChars = 200, $indexSize = 2) | |
| { | |
| if ($minNbChars < 1) { | |
| throw new \InvalidArgumentException('minNbChars must be at least 1'); | |
| } | |
| if ($maxNbChars < 10) { | |
| throw new \InvalidArgumentException('maxNbChars must be at least 10'); | |
| } | |
| if ($indexSize < 1) { | |
| throw new \InvalidArgumentException('indexSize must be at least 1'); | |
| } | |
| if ($indexSize > 5) { | |
| throw new \InvalidArgumentException('indexSize must be at most 5'); | |
| } | |
| if ($minNbChars >= $maxNbChars) { | |
| throw new \InvalidArgumentException('minNbChars must be smaller than maxNbChars'); | |
| } | |
| $words = $this->getConsecutiveWords($indexSize); | |
| $iterations = 0; | |
| do { | |
| ++$iterations; | |
| if ($iterations >= 100) { | |
| throw new \OverflowException(sprintf('Maximum retries of %d reached without finding a valid real text', $iterations)); | |
| } | |
| $result = $this->generateText($maxNbChars, $words); | |
| } while (static::strlen($result) <= $minNbChars); | |
| return $result; | |
| } | |
| /** | |
| * @param int $maxNbChars | |
| * @param array $words | |
| * | |
| * @return string | |
| */ | |
| protected function generateText($maxNbChars, $words) | |
| { | |
| $result = []; | |
| $resultLength = 0; | |
| // take a random starting point | |
| $next = static::randomKey($words); | |
| while ($resultLength < $maxNbChars && isset($words[$next])) { | |
| // fetch a random word to append | |
| $word = static::randomElement($words[$next]); | |
| // calculate next index | |
| $currentWords = static::explode($next); | |
| $currentWords[] = $word; | |
| array_shift($currentWords); | |
| $next = static::implode($currentWords); | |
| // ensure text starts with an uppercase letter | |
| if ($resultLength == 0 && !static::validStart($word)) { | |
| continue; | |
| } | |
| // append the element | |
| $result[] = $word; | |
| $resultLength += static::strlen($word) + static::$separatorLen; | |
| } | |
| // remove the element that caused the text to overflow | |
| array_pop($result); | |
| // build result | |
| $result = static::implode($result); | |
| return static::appendEnd($result); | |
| } | |
| protected function getConsecutiveWords($indexSize) | |
| { | |
| if (!isset($this->consecutiveWords[$indexSize])) { | |
| $parts = $this->getExplodedText(); | |
| $words = []; | |
| $index = []; | |
| for ($i = 0; $i < $indexSize; ++$i) { | |
| $index[] = array_shift($parts); | |
| } | |
| for ($i = 0, $count = count($parts); $i < $count; ++$i) { | |
| $stringIndex = static::implode($index); | |
| if (!isset($words[$stringIndex])) { | |
| $words[$stringIndex] = []; | |
| } | |
| $word = $parts[$i]; | |
| $words[$stringIndex][] = $word; | |
| array_shift($index); | |
| $index[] = $word; | |
| } | |
| // cache look up words for performance | |
| $this->consecutiveWords[$indexSize] = $words; | |
| } | |
| return $this->consecutiveWords[$indexSize]; | |
| } | |
| protected function getExplodedText() | |
| { | |
| if ($this->explodedText === null) { | |
| $this->explodedText = static::explode(preg_replace('/\s+/u', ' ', static::$baseText)); | |
| } | |
| return $this->explodedText; | |
| } | |
| protected static function explode($text) | |
| { | |
| return explode(static::$separator, $text); | |
| } | |
| protected static function implode($words) | |
| { | |
| return implode(static::$separator, $words); | |
| } | |
| protected static function strlen($text) | |
| { | |
| return function_exists('mb_strlen') ? mb_strlen($text, 'UTF-8') : strlen($text); | |
| } | |
| protected static function validStart($word) | |
| { | |
| $isValid = true; | |
| if (static::$textStartsWithUppercase) { | |
| $isValid = preg_match('/^\p{Lu}/u', $word); | |
| } | |
| return $isValid; | |
| } | |
| protected static function appendEnd($text) | |
| { | |
| return preg_replace("/([ ,-:;\x{2013}\x{2014}]+$)/us", '', $text) . '.'; | |
| } | |
| } | |
Xet Storage Details
- Size:
- 6.81 kB
- Xet hash:
- f7eb3d29ffaafd2edbb84f76c4ae40a83a2f7b85863579ccf9363dd153e15b22
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.