html的转义字符然后通过代码识别

发布时间:2022-03-15 15:31:46 作者:iii
来源:亿速云 阅读:127

这篇文章主要讲解了“html的转义字符然后通过代码识别”,文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习“html的转义字符然后通过代码识别”吧!

  偶尔会在数据中看到诸如'这样的字符,特征如下

  以&#开头,中间是一串数字,以;结尾

  以&开头,中间一串字符,以;结尾

  比如最常见的 或者等价的 

  浏览器遇到这些转义符,会转义回来,但如何通过代码识别?org.apache.commons.lang.StringEscapeUtils.unescapeHtml提供了很好的说明

  遇到上面的第一种情况,中间是数字的,直接将数字(unicode)转为char

  遇到第二情况,中间是字符,只能查映射表了,从映射表中找到字符对应的数字再转换为char看看代码就一目了然了

  看看HTML40如何定义的

  static{

  HTML40=newEntities();

  fillWithHtml40Entities(HTML40);

  }

  staticvoidfillWithHtml40Entities(Entitiesentities){

  entities.addEntities(BASIC_ARRAY);

  entities.addEntities(ISO8859_1_ARRAY);

  entities.addEntities(HTML40_ARRAY);

  }

  再看看BASIC_ARRAY、ISO8859_1_ARRAY、HTML40_ARRAY分别是什么

  BASIC_ARRAY

  privatestaticfinalString[][]BASIC_ARRAY={{"quot","34"},//"-double-quote

  {"amp","38"},//&-ampersand

  {"lt","60"},//<-less-than

  {"gt","62"},//>-greater-than

  };

  ISO8859_1_ARRAY

  staticfinalString[][]ISO8859_1_ARRAY={{"nbsp","160"},//non-breakingspace

  {"iexcl","161"},//invertedexclamationmark

  {"cent","162"},//centsign

  {"pound","163"},//poundsign

  {"curren","164"},//currencysign

  {"yen","165"},//yensign=yuansign

  {"brvbar","166"},//brokenbar=brokenverticalbar

  {"sect","167"},//sectionsign

  {"uml","168"},//diaeresis=spacingdiaeresis

  {"copy","169"},//�-copyrightsign

  {"ordf","170"},//feminineordinalindicator

  {"laquo","171"},//left-pointingdoubleanglequotationmark=leftpointingguillemet

  {"not","172"},//notsign

  {"shy","173"},//softhyphen=discretionaryhyphen

  {"reg","174"},//�-registeredtrademarksign

  {"macr","175"},//macron=spacingmacron=overline=APLoverbar

  {"deg","176"},//degreesign

  {"plusmn","177"},//plus-minussign=plus-or-minussign

  {"sup2","178"},//superscripttwo=superscriptdigittwo=squared

  {"sup3","179"},//superscriptthree=superscriptdigitthree=cubed

  {"acute","180"},//acuteaccent=spacingacute

  {"micro","181"},//microsign

  {"para","182"},//pilcrowsign=paragraphsign

  {"middot","183"},//middledot=Georgiancomma=Greekmiddledot

  {"cedil","184"},//cedilla=spacingcedilla

  {"sup1","185"},//superscriptone=superscriptdigitone

  {"ordm","186"},//masculineordinalindicator

  {"raquo","187"},//right-pointingdoubleanglequotationmark=rightpointingguillemet

  {"frac14","188"},//vulgarfractiononequarter=fractiononequarter

  {"frac12","189"},//vulgarfractiononehalf=fractiononehalf

  {"frac34","190"},//vulgarfractionthreequarters=fractionthreequarters

  {"iquest","191"},//invertedquestionmark=turnedquestionmark

  {"Agrave","192"},//�-uppercaseA,graveaccent

  {"Aacute","193"},//�-uppercaseA,acuteaccent

  {"Acirc","194"},//�-uppercaseA,circumflexaccent

  {"Atilde","195"},//�-uppercaseA,tilde

  {"Auml","196"},//�-uppercaseA,umlaut

  {"Aring","197"},//�-uppercaseA,ring

  {"AElig","198"},//�-uppercaseAE

  {"Ccedil","199"},//�-uppercaseC,cedilla

  {"Egrave","200"},//�-uppercaseE,graveaccent

  {"Eacute","201"},//�-uppercaseE,acuteaccent

  {"Ecirc","202"},//�-uppercaseE,circumflexaccent

  {"Euml","203"},//�-uppercaseE,umlaut

  {"Igrave","204"},//�-uppercaseI,graveaccent

  {"Iacute","205"},//�-uppercaseI,acuteaccent

  {"Icirc","206"},//�-uppercaseI,circumflexaccent

  {"Iuml","207"},//�-uppercaseI,umlaut

  {"ETH","208"},//�-uppercaseEth,Icelandic

  {"Ntilde","209"},//�-uppercaseN,tilde

  {"Ograve","210"},//�-uppercaseO,graveaccent

  {"Oacute","211"},//�-uppercaseO,acuteaccent

  {"Ocirc","212"},//�-uppercaseO,circumflexaccent

  {"Otilde","213"},//�-uppercaseO,tilde

  {"Ouml","214"},//�-uppercaseO,umlaut

  {"times","215"},//multiplicationsign

  {"Oslash","216"},//�-uppercaseO,slash

  {"Ugrave","217"},//�-uppercaseU,graveaccent

  {"Uacute","218"},//�-uppercaseU,acuteaccent

  {"Ucirc","219"},//�-uppercaseU,circumflexaccent

  {"Uuml","220"},//�-uppercaseU,umlaut

  {"Yacute","221"},//�-uppercaseY,acuteaccent

  {"THORN","222"},//�-uppercaseTHORN,Icelandic

  {"szlig","223"},//�-lowercasesharps,German

  {"agrave","224"},//�-lowercasea,graveaccent

  {"aacute","225"},//�-lowercasea,acuteaccent

  {"acirc","226"},//�-lowercasea,circumflexaccent

  {"atilde","227"},//�-lowercasea,tilde

  {"auml","228"},//�-lowercasea,umlaut

  {"aring","229"},//�-lowercasea,ring

  {"aelig","230"},//�-lowercaseae

  {"ccedil","231"},//�-lowercasec,cedilla

  {"egrave","232"},//�-lowercasee,graveaccent

  {"eacute","233"},//�-lowercasee,acuteaccent

  {"ecirc","234"},//�-lowercasee,circumflexaccent

  {"euml","235"},//�-lowercasee,umlaut

  {"igrave","236"},//�-lowercasei,graveaccent

  {"iacute","237"},//�-lowercasei,acuteaccent

  {"icirc","238"},//�-lowercasei,circumflexaccent

  {"iuml","239"},//�-lowercasei,umlaut

  {"eth","240"},//�-lowercaseeth,Icelandic

  {"ntilde","241"},//�-lowercasen,tilde

  {"ograve","242"},//�-lowercaseo,graveaccent

  {"oacute","243"},//�-lowercaseo,acuteaccent

  {"ocirc","244"},//�-lowercaseo,circumflexaccent

  {"otilde","245"},//�-lowercaseo,tilde

  {"ouml","246"},//�-lowercaseo,umlaut

  {"divide","247"},//divisionsign

  {"oslash","248"},//�-lowercaseo,slash

  {"ugrave","249"},//�-lowercaseu,graveaccent

  {"uacute","250"},//�-lowercaseu,acuteaccent

  {"ucirc","251"},//�-lowercaseu,circumflexaccent

  {"uuml","252"},//�-lowercaseu,umlaut

  {"yacute","253"},//�-lowercasey,acuteaccent

  {"thorn","254"},//�-lowercasethorn,Icelandic

  {"yuml","255"},//�-lowercasey,umlaut

  };

  HTML40_ARRAY

  staticfinalString[][]HTML40_ARRAY={

  //<!--LatinExtended-B-->

  {"fnof","402"},//latinsmallfwithhook=function=florin,U+0192ISOtech-->

  //<!--Greek-->

  {"Alpha","913"},//greekcapitalletteralpha,U+0391-->

  {"Beta","914"},//greekcapitalletterbeta,U+0392-->

  {"Gamma","915"},//greekcapitallettergamma,U+0393ISOgrk3-->

  {"Delta","916"},//greekcapitalletterdelta,U+0394ISOgrk3-->

  {"Epsilon","917"},//greekcapitalletterepsilon,U+0395-->

  {"Zeta","918"},//greekcapitalletterzeta,U+0396-->

  {"Eta","919"},//greekcapitallettereta,U+0397-->

  {"Theta","920"},//greekcapitallettertheta,U+0398ISOgrk3-->

  {"Iota","921"},//greekcapitalletteriota,U+0399-->

  {"Kappa","922"},//greekcapitalletterkappa,U+039A-->

  {"Lambda","923"},//greekcapitalletterlambda,U+039BISOgrk3-->

  {"Mu","924"},//greekcapitallettermu,U+039C-->

  {"Nu","925"},//greekcapitalletternu,U+039D-->

  {"Xi","926"},//greekcapitalletterxi,U+039EISOgrk3-->

  {"Omicron","927"},//greekcapitalletteromicron,U+039F-->

  {"Pi","928"},//greekcapitalletterpi,U+03A0ISOgrk3-->

  {"Rho","929"},//greekcapitalletterrho,U+03A1-->

  //<!--thereisnoSigmaf,andnoU+03A2charactereither-->

  {"Sigma","931"},//greekcapitallettersigma,U+03A3ISOgrk3-->

  {"Tau","932"},//greekcapitallettertau,U+03A4-->

  {"Upsilon","933"},//greekcapitalletterupsilon,U+03A5ISOgrk3-->

  {"Phi","934"},//greekcapitalletterphi,U+03A6ISOgrk3-->

  {"Chi","935"},//greekcapitalletterchi,U+03A7-->

  {"Psi","936"},//greekcapitalletterpsi,U+03A8ISOgrk3-->

  {"Omega","937"},//greekcapitalletteromega,U+03A9ISOgrk3-->

  {"alpha","945"},//greeksmallletteralpha,U+03B1ISOgrk3-->

  {"beta","946"},//greeksmallletterbeta,U+03B2ISOgrk3-->

  {"gamma","947"},//greeksmalllettergamma,U+03B3ISOgrk3-->

  {"delta","948"},//greeksmallletterdelta,U+03B4ISOgrk3-->

  {"epsilon","949"},//greeksmallletterepsilon,U+03B5ISOgrk3-->

  {"zeta","950"},//greeksmallletterzeta,U+03B6ISOgrk3-->

  {"eta","951"},//greeksmalllettereta,U+03B7ISOgrk3-->

  {"theta","952"},//greeksmalllettertheta,U+03B8ISOgrk3-->

  {"iota","953"},//greeksmallletteriota,U+03B9ISOgrk3-->

  {"kappa","954"},//greeksmallletterkappa,U+03BAISOgrk3-->

  {"lambda","955"},//greeksmallletterlambda,U+03BBISOgrk3-->

  {"mu","956"},//greeksmalllettermu,U+03BCISOgrk3-->

  {"nu","957"},//greeksmallletternu,U+03BDISOgrk3-->

  {"xi","958"},//greeksmallletterxi,U+03BEISOgrk3-->

  {"omicron","959"},//greeksmallletteromicron,U+03BFNEW-->

  {"pi","960"},//greeksmallletterpi,U+03C0ISOgrk3-->

  {"rho","961"},//greeksmallletterrho,U+03C1ISOgrk3-->

  {"sigmaf","962"},//greeksmallletterfinalsigma,U+03C2ISOgrk3-->

  {"sigma","963"},//greeksmalllettersigma,U+03C3ISOgrk3-->

  {"tau","964"},//greeksmalllettertau,U+03C4ISOgrk3-->

  {"upsilon","965"},//greeksmallletterupsilon,U+03C5ISOgrk3-->

  {"phi","966"},//greeksmallletterphi,U+03C6ISOgrk3-->

  {"chi","967"},//greeksmallletterchi,U+03C7ISOgrk3-->

  {"psi","968"},//greeksmallletterpsi,U+03C8ISOgrk3-->

  {"omega","969"},//greeksmallletteromega,U+03C9ISOgrk3-->

  {"thetasym","977"},//greeksmallletterthetasymbol,U+03D1NEW-->

  {"upsih","978"},//greekupsilonwithhooksymbol,U+03D2NEW-->

  {"piv","982"},//greekpisymbol,U+03D6ISOgrk3-->

  //<!--GeneralPunctuation-->

  {"bull","8226"},//bullet=blacksmallcircle,U+2022ISOpub-->

  //<!--bulletisNOTthesameasbulletoperator,U+2219-->

  {"hellip","8230"},//horizontalellipsis=threedotleader,U+2026ISOpub-->

  {"prime","8242"},//prime=minutes=feet,U+2032ISOtech-->

  {"Prime","8243"},//doubleprime=seconds=inches,U+2033ISOtech-->

  {"oline","8254"},//overline=spacingoverscore,U+203ENEW-->

  {"frasl","8260"},//fractionslash,U+2044NEW-->

  //<!--LetterlikeSymbols-->

  {"weierp","8472"},//scriptcapitalP=powerset=Weierstrassp,U+2118ISOamso-->

  {"image","8465"},//blacklettercapitalI=imaginarypart,U+2111ISOamso-->

  {"real","8476"},//blacklettercapitalR=realpartsymbol,U+211CISOamso-->

  {"trade","8482"},//trademarksign,U+2122ISOnum-->

  {"alefsym","8501"},//alefsymbol=firsttransfinitecardinal,U+2135NEW-->

  //<!--alefsymbolisNOTthesameashebrewletteralef,U+05D0althoughthe

  //sameglyphcouldbeusedtodepictbothcharacters-->

  //<!--Arrows-->

  {"larr","8592"},//leftwardsarrow,U+2190ISOnum-->

  {"uarr","8593"},//upwardsarrow,U+2191ISOnum-->

  {"rarr","8594"},//rightwardsarrow,U+2192ISOnum-->

  {"darr","8595"},//downwardsarrow,U+2193ISOnum-->

  {"harr","8596"},//leftrightarrow,U+2194ISOamsa-->

  {"crarr","8629"},//downwardsarrowwithcornerleftwards=carriagereturn,U+21B5NEW-->

  {"lArr","8656"},//leftwardsdoublearrow,U+21D0ISOtech-->

  //<!--ISO10646doesnotsaythatlArristhesameasthe'isimpliedby'

  //arrowbutalsodoesnothaveanyothercharacterforthatfunction.

  //So?lArrcanbeusedfor'isimpliedby'asISOtechsuggests-->

  {"uArr","8657"},//upwardsdoublearrow,U+21D1ISOamsa-->

  {"rArr","8658"},//rightwardsdoublearrow,U+21D2ISOtech-->

  //<!--ISO10646doesnotsaythisisthe'implies'characterbutdoesnot

  //haveanothercharacterwiththisfunctionso?rArrcanbeusedfor

  //'implies'asISOtechsuggests-->

  {"dArr","8659"},//downwardsdoublearrow,U+21D3ISOamsa-->

  {"hArr","8660"},//leftrightdoublearrow,U+21D4ISOamsa-->

  //<!--MathematicalOperators-->

  {"forall","8704"},//forall,U+2200ISOtech-->

  {"part","8706"},//partialdifferential,U+2202ISOtech-->

  {"exist","8707"},//thereexists,U+2203ISOtech-->

  {"empty","8709"},//emptyset=nullset=diameter,U+2205ISOamso-->

  {"nabla","8711"},//nabla=backwarddifference,U+2207ISOtech-->

  {"isin","8712"},//elementof,U+2208ISOtech-->

  {"notin","8713"},//notanelementof,U+2209ISOtech-->

  {"ni","8715"},//containsasmember,U+220BISOtech-->

  //<!--shouldtherebeamorememorablenamethan'ni'?-->

  {"prod","8719"},//n-aryproduct=productsign,U+220FISOamsb-->

  //<!--prodisNOTthesamecharacterasU+03A0'greekcapitalletterpi'

  //thoughthesameglyphmightbeusedforboth-->

  {"sum","8721"},//n-arysummation,U+2211ISOamsb-->

  //<!--sumisNOTthesamecharacterasU+03A3'greekcapitallettersigma'

  //thoughthesameglyphmightbeusedforboth-->

  {"minus","8722"},//minussign,U+2212ISOtech-->

  {"lowast","8727"},//asteriskoperator,U+2217ISOtech-->

  {"radic","8730"},//squareroot=radicalsign,U+221AISOtech-->

  {"prop","8733"},//proportionalto,U+221DISOtech-->

  {"infin","8734"},//infinity,U+221EISOtech-->

  {"ang","8736"},//angle,U+2220ISOamso-->

  {"and","8743"},//logicaland=wedge,U+2227ISOtech-->

  {"or","8744"},//logicalor=vee,U+2228ISOtech-->

  {"cap","8745"},//intersection=cap,U+2229ISOtech-->

  {"cup","8746"},//union=cup,U+222AISOtech-->

  {"int","8747"},//integral,U+222BISOtech-->

  {"there4","8756"},//therefore,U+2234ISOtech-->

  {"sim","8764"},//tildeoperator=varieswith=similarto,U+223CISOtech-->

  //<!--tildeoperatorisNOTthesamecharacterasthetilde,U+007E,although

  //thesameglyphmightbeusedtorepresentboth-->

  {"cong","8773"},//approximatelyequalto,U+2245ISOtech-->

  {"asymp","8776"},//almostequalto=asymptoticto,U+2248ISOamsr-->

  {"ne","8800"},//notequalto,U+2260ISOtech-->

  {"equiv","8801"},//identicalto,U+2261ISOtech-->

  {"le","8804"},//less-thanorequalto,U+2264ISOtech-->

  {"ge","8805"},//greater-thanorequalto,U+2265ISOtech-->

  {"sub","8834"},//subsetof,U+2282ISOtech-->

  {"sup","8835"},//supersetof,U+2283ISOtech-->

  //<!--notethatnsup,'notasupersetof,U+2283'isnotcoveredbythe

  //Symbolfontencodingandisnotincluded.Shoulditbe,forsymmetry?

  //ItisinISOamsn--><!ENTITYnsub","8836"},

  //notasubsetof,U+2284ISOamsn-->

  {"sube","8838"},//subsetoforequalto,U+2286ISOtech-->

  {"supe","8839"},//supersetoforequalto,U+2287ISOtech-->

  {"oplus","8853"},//circledplus=directsum,U+2295ISOamsb-->

  {"otimes","8855"},//circledtimes=vectorproduct,U+2297ISOamsb-->

  {"perp","8869"},//uptack=orthogonalto=perpendicular,U+22A5ISOtech-->

  {"sdot","8901"},//dotoperator,U+22C5ISOamsb-->

  //<!--dotoperatorisNOTthesamecharacterasU+00B7middledot-->

  //<!--MiscellaneousTechnical-->

  {"lceil","8968"},//leftceiling=aplupstile,U+2308ISOamsc-->

  {"rceil","8969"},//rightceiling,U+2309ISOamsc-->

  {"lfloor","8970"},//leftfloor=apldownstile,U+230AISOamsc-->

  {"rfloor","8971"},//rightfloor,U+230BISOamsc-->

  {"lang","9001"},//left-pointinganglebracket=bra,U+2329ISOtech-->

  //<!--langisNOTthesamecharacterasU+003C'lessthan'orU+2039'singleleft-pointinganglequotation

  //mark'-->

  {"rang","9002"},//right-pointinganglebracket=ket,U+232AISOtech-->

  //<!--rangisNOTthesamecharacterasU+003E'greaterthan'orU+203A

  //'singleright-pointinganglequotationmark'-->

  //<!--GeometricShapes-->

  {"loz","9674"},//lozenge,U+25CAISOpub-->

  //<!--MiscellaneousSymbols-->

  {"spades","9824"},//blackspadesuit,U+2660ISOpub-->

  //<!--blackhereseemstomeanfilledasopposedtohollow-->

  {"clubs","9827"},//blackclubsuit=shamrock,U+2663ISOpub-->

  {"hearts","9829"},//blackheartsuit=valentine,U+2665ISOpub-->

  {"diams","9830"},//blackdiamondsuit,U+2666ISOpub-->

  //<!--LatinExtended-A-->

  {"OElig","338"},//--latincapitalligatureOE,U+0152ISOlat2-->

  {"oelig","339"},//--latinsmallligatureoe,U+0153ISOlat2-->

  //<!--ligatureisamisnomer,thisisaseparatecharacterinsomelanguages-->

  {"Scaron","352"},//--latincapitalletterSwithcaron,U+0160ISOlat2-->

  {"scaron","353"},//--latinsmallletterswithcaron,U+0161ISOlat2-->

  {"Yuml","376"},//--latincapitalletterYwithdiaeresis,U+0178ISOlat2-->

  //<!--SpacingModifierLetters-->

  {"circ","710"},//--modifierlettercircumflexaccent,U+02C6ISOpub-->

  {"tilde","732"},//smalltilde,U+02DCISOdia-->

  //<!--GeneralPunctuation-->

  {"ensp","8194"},//enspace,U+2002ISOpub-->

  {"emsp","8195"},//emspace,U+2003ISOpub-->

  {"thinsp","8201"},//thinspace,U+2009ISOpub-->

  {"zwnj","8204"},//zerowidthnon-joiner,U+200CNEWRFC2070-->

  {"zwj","8205"},//zerowidthjoiner,U+200DNEWRFC2070-->

  {"lrm","8206"},//left-to-rightmark,U+200ENEWRFC2070-->

  {"rlm","8207"},//right-to-leftmark,U+200FNEWRFC2070-->

  {"ndash","8211"},//endash,U+2013ISOpub-->

  {"mdash","8212"},//emdash,U+2014ISOpub-->

  {"lsquo","8216"},//leftsinglequotationmark,U+2018ISOnum-->

  {"rsquo","8217"},//rightsinglequotationmark,U+2019ISOnum-->

  {"sbquo","8218"},//singlelow-9quotationmark,U+201ANEW-->

  {"ldquo","8220"},//leftdoublequotationmark,U+201CISOnum-->

  {"rdquo","8221"},//rightdoublequotationmark,U+201DISOnum-->

  {"bdquo","8222"},//doublelow-9quotationmark,U+201ENEW-->

  {"dagger","8224"},//dagger,U+2020ISOpub-->

  {"Dagger","8225"},//doubledagger,U+2021ISOpub-->

  {"permil","8240"},//permillesign,U+2030ISOtech-->

  {"lsaquo","8249"},//singleleft-pointinganglequotationmark,U+2039ISOproposed-->

  //<!--lsaquoisproposedbutnotyetISOstandardized-->

  {"rsaquo","8250"},//singleright-pointinganglequotationmark,U+203AISOproposed-->

  //<!--rsaquoisproposedbutnotyetISOstandardized-->

  {"euro","8364"},//--eurosign,U+20ACNEW-->

  };

感谢各位的阅读,以上就是“html的转义字符然后通过代码识别”的内容了,经过本文的学习后,相信大家对html的转义字符然后通过代码识别这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是亿速云,小编将为大家推送更多相关知识点的文章,欢迎关注!

推荐阅读:
  1. 用 Python 处理 HTML 转义字符的5种方式
  2. HTML 转义字符

免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。

html

上一篇:python如何在一个类中定义多个构造函数

下一篇:python如何使用装饰器缓存函数调用

相关阅读

您好,登录后才能下订单哦!

密码登录
登录注册
其他方式登录
点击 登录注册 即表示同意《亿速云用户服务条款》