package URI::_punycode; use strict; use warnings; our $VERSION = '1.73'; $VERSION = eval $VERSION; use Exporter 'import'; our @EXPORT = qw(encode_punycode decode_punycode); use integer; our $DEBUG = 0; use constant BASE => 36; use constant TMIN => 1; use constant TMAX => 26; use constant SKEW => 38; use constant DAMP => 700; use constant INITIAL_BIAS => 72; use constant INITIAL_N => 128; my $Delimiter = chr 0x2D; my $BasicRE = qr/[\x00-\x7f]/; sub _croak { require Carp; Carp::croak(@_); } sub digit_value { my $code = shift; return ord($code) - ord("A") if $code =~ /[A-Z]/; return ord($code) - ord("a") if $code =~ /[a-z]/; return ord($code) - ord("0") + 26 if $code =~ /[0-9]/; return; } sub code_point { my $digit = shift; return $digit + ord('a') if 0 <= $digit && $digit <= 25; return $digit + ord('0') - 26 if 26 <= $digit && $digit <= 36; die 'NOT COME HERE'; } sub adapt { my($delta, $numpoints, $firsttime) = @_; $delta = $firsttime ? $delta / DAMP : $delta / 2; $delta += $delta / $numpoints; my $k = 0; while ($delta > ((BASE - TMIN) * TMAX) / 2) { $delta /= BASE - TMIN; $k += BASE; } return $k + (((BASE - TMIN + 1) * $delta) / ($delta + SKEW)); } sub decode_punycode { my $code = shift; my $n = INITIAL_N; my $i = 0; my $bias = INITIAL_BIAS; my @output; if ($code =~ s/(.*)$Delimiter//o) { push @output, map ord, split //, $1; return _croak('non-basic code point') unless $1 =~ /^$BasicRE*$/o; } while ($code) { my $oldi = $i; my $w = 1; LOOP: for (my $k = BASE; 1; $k += BASE) { my $cp = substr($code, 0, 1, ''); my $digit = digit_value($cp); defined $digit or return _croak("invalid punycode input"); $i += $digit * $w; my $t = ($k <= $bias) ? TMIN : ($k >= $bias + TMAX) ? TMAX : $k - $bias; last LOOP if $digit < $t; $w *= (BASE - $t); } $bias = adapt($i - $oldi, @output + 1, $oldi == 0); warn "bias becomes $bias" if $DEBUG; $n += $i / (@output + 1); $i = $i % (@output + 1); splice(@output, $i, 0, $n); warn join " ", map sprintf('%04x', $_), @output if $DEBUG; $i++; } return join '', map chr, @output; } sub encode_punycode { my $input = shift; my @input = split //, $input; my $n = INITIAL_N; my $delta = 0; my $bias = INITIAL_BIAS; my @output; my @basic = grep /$BasicRE/, @input; my $h = my $b = @basic; push @output, @basic; push @output, $Delimiter if $b && $h < @input; warn "basic codepoints: (@output)" if $DEBUG; while ($h < @input) { my $m = min(grep { $_ >= $n } map ord, @input); warn sprintf "next code point to insert is %04x", $m if $DEBUG; $delta += ($m - $n) * ($h + 1); $n = $m; for my $i (@input) { my $c = ord($i); $delta++ if $c < $n; if ($c == $n) { my $q = $delta; LOOP: for (my $k = BASE; 1; $k += BASE) { my $t = ($k <= $bias) ? TMIN : ($k >= $bias + TMAX) ? TMAX : $k - $bias; last LOOP if $q < $t; my $cp = code_point($t + (($q - $t) % (BASE - $t))); push @output, chr($cp); $q = ($q - $t) / (BASE - $t); } push @output, chr(code_point($q)); $bias = adapt($delta, $h + 1, $h == $b); warn "bias becomes $bias" if $DEBUG; $delta = 0; $h++; } } $delta++; $n++; } return join '', @output; } sub min { my $min = shift; for (@_) { $min = $_ if $_ <= $min } return $min; } 1; __END__ =encoding utf8 =head1 NAME URI::_punycode - encodes Unicode string in Punycode =head1 SYNOPSIS use strict; use warnings; use utf8; use URI::_punycode qw(encode_punycode decode_punycode); # encode a unicode string my $punycode = encode_punycode('http://☃.net'); # http://.net-xc8g $punycode = encode_punycode('bücher'); # bcher-kva $punycode = encode_punycode('他们为什么不说中文'); # ihqwcrb4cv8a8dqg056pqjye # decode a punycode string back into a unicode string my $unicode = decode_punycode('http://.net-xc8g'); # http://☃.net $unicode = decode_punycode('bcher-kva'); # bücher $unicode = decode_punycode('ihqwcrb4cv8a8dqg056pqjye'); # 他们为什么不说中文 =head1 DESCRIPTION L<URI::_punycode> is a module to encode / decode Unicode strings into L<Punycode|https://tools.ietf.org/html/rfc3492>, an efficient encoding of Unicode for use with L<IDNA|https://tools.ietf.org/html/rfc5890>. =head1 FUNCTIONS All functions throw exceptions on failure. You can C<catch> them with L<Syntax::Keyword::Try> or L<Try::Tiny>. The following functions are exported by default. =head2 encode_punycode my $punycode = encode_punycode('http://☃.net'); # http://.net-xc8g $punycode = encode_punycode('bücher'); # bcher-kva $punycode = encode_punycode('他们为什么不说中文') # ihqwcrb4cv8a8dqg056pqjye Takes a Unicode string (UTF8-flagged variable) and returns a Punycode encoding for it. =head2 decode_punycode my $unicode = decode_punycode('http://.net-xc8g'); # http://☃.net $unicode = decode_punycode('bcher-kva'); # bücher $unicode = decode_punycode('ihqwcrb4cv8a8dqg056pqjye'); # 他们为什么不说中文 Takes a Punycode encoding and returns original Unicode string. =head1 AUTHOR Tatsuhiko Miyagawa <F<miyagawa@bulknews.net>> is the author of L<IDNA::Punycode> which was the basis for this module. =head1 SEE ALSO L<IDNA::Punycode>, L<RFC 3492|https://tools.ietf.org/html/rfc3492>, L<RFC 5891|https://tools.ietf.org/html/rfc5891> =head1 COPYRIGHT AND LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut
Name | Type | Size | Permission | Actions |
---|---|---|---|---|
file | Folder | 0755 |
|
|
urn | Folder | 0755 |
|
|
Escape.pm | File | 6.9 KB | 0644 |
|
Heuristic.pm | File | 6.37 KB | 0644 |
|
IRI.pm | File | 820 B | 0644 |
|
QueryParam.pm | File | 4.77 KB | 0644 |
|
Split.pm | File | 2.32 KB | 0644 |
|
URL.pm | File | 5.36 KB | 0644 |
|
WithBase.pm | File | 3.77 KB | 0644 |
|
_foreign.pm | File | 133 B | 0644 |
|
_generic.pm | File | 5.71 KB | 0644 |
|
_idna.pm | File | 2.06 KB | 0644 |
|
_ldap.pm | File | 3.2 KB | 0644 |
|
_login.pm | File | 257 B | 0644 |
|
_punycode.pm | File | 5.52 KB | 0644 |
|
_query.pm | File | 2.5 KB | 0644 |
|
_segment.pm | File | 442 B | 0644 |
|
_server.pm | File | 3.66 KB | 0644 |
|
_userpass.pm | File | 1.04 KB | 0644 |
|
data.pm | File | 3.34 KB | 0644 |
|
file.pm | File | 9.53 KB | 0644 |
|
ftp.pm | File | 1.06 KB | 0644 |
|
gopher.pm | File | 2.4 KB | 0644 |
|
http.pm | File | 451 B | 0644 |
|
https.pm | File | 170 B | 0644 |
|
ldap.pm | File | 2.88 KB | 0644 |
|
ldapi.pm | File | 467 B | 0644 |
|
ldaps.pm | File | 170 B | 0644 |
|
mailto.pm | File | 1.27 KB | 0644 |
|
mms.pm | File | 151 B | 0644 |
|
news.pm | File | 1.45 KB | 0644 |
|
nntp.pm | File | 153 B | 0644 |
|
pop.pm | File | 1.2 KB | 0644 |
|
rlogin.pm | File | 155 B | 0644 |
|
rsync.pm | File | 233 B | 0644 |
|
rtsp.pm | File | 151 B | 0644 |
|
rtspu.pm | File | 152 B | 0644 |
|
sftp.pm | File | 124 B | 0644 |
|
sip.pm | File | 1.69 KB | 0644 |
|
sips.pm | File | 169 B | 0644 |
|
snews.pm | File | 198 B | 0644 |
|
ssh.pm | File | 201 B | 0644 |
|
telnet.pm | File | 154 B | 0644 |
|
tn3270.pm | File | 154 B | 0644 |
|
urn.pm | File | 2.15 KB | 0644 |
|