OpenForum RSS: regexp + UTF-8 = ��

regexp��+ UTF-8 = �� (oklas)

Thu, 21 Aug 2014 06:53:04 GMT

- �� utf8 � �� ,
� � �� .

- �� , ��
�� (�� ).

- �� , �� .

- ��
� �� .

- "�� " - ��
�� google yandex rambler � �.�.

regexp��+ UTF-8 = �� (��)

Thu, 21 Aug 2014 05:47:09 GMT

� �� setlocale? � � �� ӣ �� .. �� , � �� , �� .�. �� ... �� perl � �� , �� !!! �� !!! �� ... �� , �� !

regexp��+ UTF-8 = �� (��)

Thu, 21 Aug 2014 05:38:03 GMT

>[�� ]
> $bytes = encode('utf-8', $string); # � ��, (�� )
> $string = decode('utf-8', $bytes); # ��
> ! ��, �� perl �� , ��
> �� utf-8 � print'� � �� utf-8,
> �� decode( 'utf-8',
> ... ), �� -�� (�� socket'� �� fcgi, ��
> ��-�� .�.) �� utf-8 ��
> �� .
> �� :
> open FH, "<:utf8", $filename; # �� "perldoc -f open"

�� ... �� , �� -�� ! �� ... � �� ӣ �� ctype ��... � �� ! �ݣ ��

regexp��+ UTF-8 = �� (oklas)

Tue, 03 Dec 2013 11:53:24 GMT

� �� , � �� , �� length � �.�.

�� :

use utf8; # �� , �� -ASCII ��,
��

use Encode; # �� .

$bytes = encode('utf-8', $string); # � ��, (�� )
$string = decode('utf-8', $bytes); # ��

! ��, �� perl �� , �� utf-8 � print'� � �� utf-8, �� decode( 'utf-8', ... ), �� -�� (�� socket'� �� fcgi, �� -�� .�.) �� utf-8 �� .

regexp��+ UTF-8 = �� (angra)

Thu, 12 Mar 2009 23:14:16 GMT

�� . �� utf8::upgrade($infile); �� my $infile �� . �� use strict �� .
�� :
#!/usr/bin/perl -w
use strict;

use utf8;

open(DATA,'file.txt');
my $infile = <DATA>;
utf8::decode($infile);
close DATA;

while($infile =~ m/([�-��-�]+)/gi) {
my $s=$1;
#�� encode � $1 ��
utf8::encode($s);
print "$s\n";
}

� �� (�� ) �� , �� _��_ �� /��.

regexp��+ UTF-8 = �� (�� )

Thu, 12 Mar 2009 22:52:44 GMT

>man perluniintro
>man perlunifaq
>man perlunitut
>man perlunicode
>��
>use utf8;
>use open ':encoding(utf8)';
>utf8::upgrade($infile);
>�� , �� .

��, �� :
$ less report.pl
#!/usr/bin/perl

use utf8;
use open ':encoding(utf8)';
utf8::upgrade($infile);

open(DATA,'file.txt');
my $infile = <DATA>;
close DATA;

while($infile =~ m/([�-��-�]+)/gi) {
print "$1\n";
}

$ less file.txt
"��" "�� "

$ ./report.pl
Wide character in print at page.analyser_simplified.pl line 12.
��
Wide character in print at page.analyser_simplified.pl line 12.
��
Wide character in print at page.analyser_simplified.pl line 12.
��

��!! ��. �� wide character.
�� . ��.

regexp��+ UTF-8 = �� (gaa)

Thu, 12 Mar 2009 22:25:20 GMT

>>� �� .
>>��-�� [[:alnum:]] �� \w, �� .
>�� , �� .

� �� \w � [[:alpha:]] �� . �� .

regexp��+ UTF-8 = �� (angra)

Thu, 12 Mar 2009 21:54:52 GMT

>� �� .
>��-�� [[:alnum:]] �� \w, �� .

�� , �� . �� , � � \w �� .
�� isalpha �� , � �� utf8, �� use locale.

regexp��+ UTF-8 = �� (gaa)

Thu, 12 Mar 2009 18:46:49 GMT

>�� :
>#!/usr/bin/perl

� �� Σ�?

� �� . ��-�� [[:alnum:]] �� \w, �� .

OpenForum RSS: regexp + UTF-8 = �� ��������

regexp��+ UTF-8 = �� �������� (oklas)

regexp��+ UTF-8 = �� �������� (��������)

regexp��+ UTF-8 = �� �������� (��������)

regexp��+ UTF-8 = �� �������� (oklas)

regexp��+ UTF-8 = �� �������� (angra)

regexp��+ UTF-8 = �� �������� (������ �� ������)

regexp��+ UTF-8 = �� �������� (gaa)

regexp��+ UTF-8 = �� �������� (angra)

regexp��+ UTF-8 = �� �������� (gaa)

OpenForum RSS: regexp + UTF-8 = ��

regexp��+ UTF-8 = �� (oklas)

regexp��+ UTF-8 = �� (��)

regexp��+ UTF-8 = �� (��)

regexp��+ UTF-8 = �� (oklas)

regexp��+ UTF-8 = �� (angra)

regexp��+ UTF-8 = �� (�� )

regexp��+ UTF-8 = �� (gaa)

regexp��+ UTF-8 = �� (angra)

regexp��+ UTF-8 = �� (gaa)