PNG  IHDRxsBIT|d pHYs+tEXtSoftwarewww.inkscape.org<,tEXtComment File Manager

File Manager

Path: /opt/cloudlinux/venv/lib64/python3.11/site-packages/charset_normalizer/cli/

Viewing File: normalizer.py

import argparse
import sys
from json import dumps
from os.path import abspath
from platform import python_version
from typing import List, Optional

try:
    from unicodedata2 import unidata_version
except ImportError:
    from unicodedata import unidata_version

from charset_normalizer import from_fp
from charset_normalizer.models import CliDetectionResult
from charset_normalizer.version import __version__


def query_yes_no(question: str, default: str = "yes") -> bool:
    """Ask a yes/no question via input() and return their answer.

    "question" is a string that is presented to the user.
    "default" is the presumed answer if the user just hits <Enter>.
        It must be "yes" (the default), "no" or None (meaning
        an answer is required of the user).

    The "answer" return value is True for "yes" or False for "no".

    Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
    """
    valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
    if default is None:
        prompt = " [y/n] "
    elif default == "yes":
        prompt = " [Y/n] "
    elif default == "no":
        prompt = " [y/N] "
    else:
        raise ValueError("invalid default answer: '%s'" % default)

    while True:
        sys.stdout.write(question + prompt)
        choice = input().lower()
        if default is not None and choice == "":
            return valid[default]
        elif choice in valid:
            return valid[choice]
        else:
            sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")


def cli_detect(argv: Optional[List[str]] = None) -> int:
    """
    CLI assistant using ARGV and ArgumentParser
    :param argv:
    :return: 0 if everything is fine, anything else equal trouble
    """
    parser = argparse.ArgumentParser(
        description="The Real First Universal Charset Detector. "
        "Discover originating encoding used on text file. "
        "Normalize text to unicode."
    )

    parser.add_argument(
        "files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
    )
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        default=False,
        dest="verbose",
        help="Display complementary information about file if any. "
        "Stdout will contain logs about the detection process.",
    )
    parser.add_argument(
        "-a",
        "--with-alternative",
        action="store_true",
        default=False,
        dest="alternatives",
        help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
    )
    parser.add_argument(
        "-n",
        "--normalize",
        action="store_true",
        default=False,
        dest="normalize",
        help="Permit to normalize input file. If not set, program does not write anything.",
    )
    parser.add_argument(
        "-m",
        "--minimal",
        action="store_true",
        default=False,
        dest="minimal",
        help="Only output the charset detected to STDOUT. Disabling JSON output.",
    )
    parser.add_argument(
        "-r",
        "--replace",
        action="store_true",
        default=False,
        dest="replace",
        help="Replace file when trying to normalize it instead of creating a new one.",
    )
    parser.add_argument(
        "-f",
        "--force",
        action="store_true",
        default=False,
        dest="force",
        help="Replace file without asking if you are sure, use this flag with caution.",
    )
    parser.add_argument(
        "-t",
        "--threshold",
        action="store",
        default=0.2,
        type=float,
        dest="threshold",
        help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.",
    )
    parser.add_argument(
        "--version",
        action="version",
        version="Charset-Normalizer {} - Python {} - Unicode {}".format(
            __version__, python_version(), unidata_version
        ),
        help="Show version information and exit.",
    )

    args = parser.parse_args(argv)

    if args.replace is True and args.normalize is False:
        print("Use --replace in addition of --normalize only.", file=sys.stderr)
        return 1

    if args.force is True and args.replace is False:
        print("Use --force in addition of --replace only.", file=sys.stderr)
        return 1

    if args.threshold < 0.0 or args.threshold > 1.0:
        print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
        return 1

    x_ = []

    for my_file in args.files:

        matches = from_fp(my_file, threshold=args.threshold, explain=args.verbose)

        best_guess = matches.best()

        if best_guess is None:
            print(
                'Unable to identify originating encoding for "{}". {}'.format(
                    my_file.name,
                    "Maybe try increasing maximum amount of chaos."
                    if args.threshold < 1.0
                    else "",
                ),
                file=sys.stderr,
            )
            x_.append(
                CliDetectionResult(
                    abspath(my_file.name),
                    None,
                    [],
                    [],
                    "Unknown",
                    [],
                    False,
                    1.0,
                    0.0,
                    None,
                    True,
                )
            )
        else:
            x_.append(
                CliDetectionResult(
                    abspath(my_file.name),
                    best_guess.encoding,
                    best_guess.encoding_aliases,
                    [
                        cp
                        for cp in best_guess.could_be_from_charset
                        if cp != best_guess.encoding
                    ],
                    best_guess.language,
                    best_guess.alphabets,
                    best_guess.bom,
                    best_guess.percent_chaos,
                    best_guess.percent_coherence,
                    None,
                    True,
                )
            )

            if len(matches) > 1 and args.alternatives:
                for el in matches:
                    if el != best_guess:
                        x_.append(
                            CliDetectionResult(
                                abspath(my_file.name),
                                el.encoding,
                                el.encoding_aliases,
                                [
                                    cp
                                    for cp in el.could_be_from_charset
                                    if cp != el.encoding
                                ],
                                el.language,
                                el.alphabets,
                                el.bom,
                                el.percent_chaos,
                                el.percent_coherence,
                                None,
                                False,
                            )
                        )

            if args.normalize is True:

                if best_guess.encoding.startswith("utf") is True:
                    print(
                        '"{}" file does not need to be normalized, as it already came from unicode.'.format(
                            my_file.name
                        ),
                        file=sys.stderr,
                    )
                    if my_file.closed is False:
                        my_file.close()
                    continue

                o_: List[str] = my_file.name.split(".")

                if args.replace is False:
                    o_.insert(-1, best_guess.encoding)
                    if my_file.closed is False:
                        my_file.close()
                elif (
                    args.force is False
                    and query_yes_no(
                        'Are you sure to normalize "{}" by replacing it ?'.format(
                            my_file.name
                        ),
                        "no",
                    )
                    is False
                ):
                    if my_file.closed is False:
                        my_file.close()
                    continue

                try:
                    x_[0].unicode_path = abspath("./{}".format(".".join(o_)))

                    with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
                        fp.write(str(best_guess))
                except IOError as e:
                    print(str(e), file=sys.stderr)
                    if my_file.closed is False:
                        my_file.close()
                    return 2

        if my_file.closed is False:
            my_file.close()

    if args.minimal is False:
        print(
            dumps(
                [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
                ensure_ascii=True,
                indent=4,
            )
        )
    else:
        for my_file in args.files:
            print(
                ", ".join(
                    [
                        el.encoding or "undefined"
                        for el in x_
                        if el.path == abspath(my_file.name)
                    ]
                )
            )

    return 0


if __name__ == "__main__":
    cli_detect()
b IDATxytVսϓ22 A@IR :hCiZ[v*E:WũZA ^dQeQ @ !jZ'>gsV仿$|?g)&x-EIENT ;@xT.i%-X}SvS5.r/UHz^_$-W"w)Ɗ/@Z &IoX P$K}JzX:;` &, ŋui,e6mX ԵrKb1ԗ)DADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADA݀!I*]R;I2$eZ#ORZSrr6mteffu*((Pu'v{DIߔ4^pIm'77WEEE;vƎ4-$]'RI{\I&G :IHJ DWBB=\WR޽m o$K(V9ABB.}jѢv`^?IOȅ} ڶmG}T#FJ`56$-ھ}FI&v;0(h;Б38CӧOWf!;A i:F_m9s&|q%=#wZprrrla A &P\\СC[A#! {olF} `E2}MK/vV)i{4BffV\|ۭX`b@kɶ@%i$K z5zhmX[IXZ` 'b%$r5M4º/l ԃߖxhʔ)[@=} K6IM}^5k㏷݆z ΗÿO:gdGBmyT/@+Vɶ纽z񕏵l.y޴it뭷zV0[Y^>Wsqs}\/@$(T7f.InݺiR$푔n.~?H))\ZRW'Mo~v Ov6oԃxz! S,&xm/yɞԟ?'uaSѽb,8GלKboi&3t7Y,)JJ c[nzӳdE&KsZLӄ I?@&%ӟ۶mSMMњ0iؐSZ,|J+N ~,0A0!5%Q-YQQa3}$_vVrf9f?S8`zDADADADADADADADADAdqP,تmMmg1V?rSI꒟]u|l RCyEf٢9 jURbztѰ!m5~tGj2DhG*{H9)꒟ר3:(+3\?/;TUݭʴ~S6lڧUJ*i$d(#=Yݺd{,p|3B))q:vN0Y.jkק6;SɶVzHJJЀ-utѹսk>QUU\޲~]fFnK?&ߡ5b=z9)^|u_k-[y%ZNU6 7Mi:]ۦtk[n X(e6Bb."8cۭ|~teuuw|ήI-5"~Uk;ZicEmN/:]M> cQ^uiƞ??Ңpc#TUU3UakNwA`:Y_V-8.KKfRitv޲* 9S6ֿj,ՃNOMߤ]z^fOh|<>@Å5 _/Iu?{SY4hK/2]4%it5q]GGe2%iR| W&f*^]??vq[LgE_3f}Fxu~}qd-ږFxu~I N>\;͗O֊:̗WJ@BhW=y|GgwܷH_NY?)Tdi'?խwhlmQi !SUUsw4kӺe4rfxu-[nHtMFj}H_u~w>)oV}(T'ebʒv3_[+vn@Ȭ\S}ot}w=kHFnxg S 0eޢm~l}uqZfFoZuuEg `zt~? b;t%>WTkķh[2eG8LIWx,^\thrl^Ϊ{=dž<}qV@ ⠨Wy^LF_>0UkDuʫuCs$)Iv:IK;6ֲ4{^6եm+l3>݆uM 9u?>Zc }g~qhKwڭeFMM~pМuqǿz6Tb@8@Y|jx](^]gf}M"tG -w.@vOqh~/HII`S[l.6nØXL9vUcOoB\xoǤ'T&IǍQw_wpv[kmO{w~>#=P1Pɞa-we:iǏlHo׈꒟f9SzH?+shk%Fs:qVhqY`jvO'ρ?PyX3lх]˾uV{ݞ]1,MzYNW~̈́ joYn}ȚF߾׮mS]F z+EDxm/d{F{-W-4wY듏:??_gPf ^3ecg ҵs8R2מz@TANGj)}CNi/R~}c:5{!ZHӋӾ6}T]G]7W6^n 9*,YqOZj:P?Q DFL|?-^.Ɵ7}fFh׶xe2Pscz1&5\cn[=Vn[ĶE鎀uˌd3GII k;lNmشOuuRVfBE]ۣeӶu :X-[(er4~LHi6:Ѻ@ԅrST0trk%$Č0ez" *z"T/X9|8.C5Feg}CQ%͞ˣJvL/?j^h&9xF`њZ(&yF&Iݻfg#W;3^{Wo^4'vV[[K';+mӍִ]AC@W?1^{එyh +^]fm~iԵ]AB@WTk̏t uR?l.OIHiYyԶ]Aˀ7c:q}ힽaf6Z~қm(+sK4{^6}T*UUu]n.:kx{:2 _m=sAߤU@?Z-Vކеz왍Nэ{|5 pڶn b p-@sPg]0G7fy-M{GCF'%{4`=$-Ge\ eU:m+Zt'WjO!OAF@ik&t݆ϥ_ e}=]"Wz_.͜E3leWFih|t-wZۍ-uw=6YN{6|} |*={Ѽn.S.z1zjۻTH]흾 DuDvmvK.`V]yY~sI@t?/ϓ. m&["+P?MzovVЫG3-GRR[(!!\_,^%?v@ҵő m`Y)tem8GMx.))A]Y i`ViW`?^~!S#^+ѽGZj?Vģ0.))A꨷lzL*]OXrY`DBBLOj{-MH'ii-ϰ ok7^ )쭡b]UXSְmռY|5*cֽk0B7镹%ڽP#8nȎq}mJr23_>lE5$iwui+ H~F`IjƵ@q \ @#qG0".0" l`„.0! ,AQHN6qzkKJ#o;`Xv2>,tێJJ7Z/*A .@fفjMzkg @TvZH3Zxu6Ra'%O?/dQ5xYkU]Rֽkق@DaS^RSּ5|BeHNN͘p HvcYcC5:y #`οb;z2.!kr}gUWkyZn=f Pvsn3p~;4p˚=ē~NmI] ¾ 0lH[_L hsh_ғߤc_њec)g7VIZ5yrgk̞W#IjӪv>՞y睝M8[|]\շ8M6%|@PZڨI-m>=k='aiRo-x?>Q.}`Ȏ:Wsmu u > .@,&;+!!˱tﭧDQwRW\vF\~Q7>spYw$%A~;~}6¾ g&if_=j,v+UL1(tWake:@Ș>j$Gq2t7S?vL|]u/ .(0E6Mk6hiۺzښOrifޱxm/Gx> Lal%%~{lBsR4*}{0Z/tNIɚpV^#Lf:u@k#RSu =S^ZyuR/.@n&΃z~B=0eg뺆#,Þ[B/?H uUf7y Wy}Bwegל`Wh(||`l`.;Ws?V@"c:iɍL֯PGv6zctM̠':wuW;d=;EveD}9J@B(0iհ bvP1{\P&G7D޴Iy_$-Qjm~Yrr&]CDv%bh|Yzni_ˆR;kg}nJOIIwyuL}{ЌNj}:+3Y?:WJ/N+Rzd=hb;dj͒suݔ@NKMԄ jqzC5@y°hL m;*5ezᕏ=ep XL n?מ:r`۵tŤZ|1v`V뽧_csج'ߤ%oTuumk%%%h)uy]Nk[n 'b2 l.=͜E%gf$[c;s:V-͞WߤWh-j7]4=F-X]>ZLSi[Y*We;Zan(ӇW|e(HNNP5[= r4tP &0<pc#`vTNV GFqvTi*Tyam$ߏWyE*VJKMTfFw>'$-ؽ.Ho.8c"@DADADADADADADADADA~j*֘,N;Pi3599h=goضLgiJ5փy~}&Zd9p֚ e:|hL``b/d9p? fgg+%%hMgXosج, ΩOl0Zh=xdjLmhݻoO[g_l,8a]٭+ӧ0$I]c]:粹:Teꢢ"5a^Kgh,&= =՟^߶“ߢE ܹS J}I%:8 IDAT~,9/ʃPW'Mo}zNƍ쨓zPbNZ~^z=4mswg;5 Y~SVMRXUյڱRf?s:w ;6H:ºi5-maM&O3;1IKeamZh͛7+##v+c ~u~ca]GnF'ټL~PPPbn voC4R,ӟgg %hq}@#M4IÇ Oy^xMZx ) yOw@HkN˖-Sǎmb]X@n+i͖!++K3gd\$mt$^YfJ\8PRF)77Wא!Cl$i:@@_oG I{$# 8磌ŋ91A (Im7֭>}ߴJq7ޗt^ -[ԩSj*}%]&' -ɓ'ꫯVzzvB#;a 7@GxI{j޼ƌ.LÇWBB7`O"I$/@R @eee@۷>}0,ɒ2$53Xs|cS~rpTYYY} kHc %&k.], @ADADADADADADADADA@lT<%''*Lo^={رc5h %$+CnܸQ3fҥK}vUVVs9G R,_{xˇ3o߾;TTTd}馛]uuuG~iԩ@4bnvmvfϞ /Peeeq}}za I~,誫{UWW뮻}_~YƍSMMMYχ֝waw\ďcxꩧtEƍկ_?۷5@u?1kNׯWzz/wy>}zj3 k(ٺuq_Zvf̘:~ ABQ&r|!%KҥKgԞ={<_X-z !CyFUUz~ ABQIIIjݺW$UXXDٳZ~ ABQƍecW$<(~<RSSvZujjjԧOZQu@4 8m&&&jԩg$ď1h ͟?_{768@g =@`)))5o6m3)ѣƌJ;wҿUTT /KZR{~a=@0o<*狔iFɶ[ˎ;T]]OX@?K.ۈxN pppppppppppppppppPfl߾] ,{ァk۶mڿo5BTӦMӴiӴ|r DB2e|An!Dy'tkΝ[A $***t5' "!駟oaDnΝ:t֭[gDШQ06qD;@ x M6v(PiizmZ4ew"@̴ixf [~-Fٱc&IZ2|n!?$@{[HTɏ#@hȎI# _m(F /6Z3z'\r,r!;w2Z3j=~GY7"I$iI.p_"?pN`y DD?: _  Gÿab7J !Bx@0 Bo cG@`1C[@0G @`0C_u V1 aCX>W ` | `!<S `"<. `#c`?cAC4 ?c p#~@0?:08&_MQ1J h#?/`7;I  q 7a wQ A 1 Hp !#<8/#@1Ul7=S=K.4Z?E_$i@!1!E4?`P_  @Bă10#: "aU,xbFY1 [n|n #'vEH:`xb #vD4Y hi.i&EΖv#O H4IŶ}:Ikh @tZRF#(tXҙzZ ?I3l7q@õ|ۍ1,GpuY Ꮿ@hJv#xxk$ v#9 5 }_$c S#=+"K{F*m7`#%H:NRSp6I?sIՖ{Ap$I$I:QRv2$Z @UJ*$]<FO4IENDB`