Plan 9 from Bell Labs’s /usr/web/sources/contrib/stallion/root/sys/lib/python2.7/HTMLParser.pyc

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


�`^c@sdZddlZddlZejd�Zejd�Zejd�Zejd�Zejd�Zejd�Z	ejd	�Z
ejd
�Zejd�Zejd�Z
ejd
ej�Zejd�Zejd�Zdefd��YZdejfd��YZdS(sA parser for HTML and XHTML.i�Ns[&<]s
&[a-zA-Z#]s%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s	<[a-zA-Z]t>s--\s*>s$([a-zA-Z][^	

 />]*)(?:\s|/(?!>))*s[a-zA-Z][^	

 />]*s]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*s
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
s#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>tHTMLParseErrorcBs#eZdZdd�Zd�ZRS(s&Exception raised for all parse errors.cCs3|st�||_|d|_|d|_dS(Nii(tAssertionErrortmsgtlinenotoffset(tselfRtposition((s /sys/lib/python2.7/HTMLParser.pyt__init__<s	
cCsW|j}|jdk	r,|d|j}n|jdk	rS|d|jd}n|S(Ns, at line %ds, column %di(RRtNoneR(Rtresult((s /sys/lib/python2.7/HTMLParser.pyt__str__Bs	N(NN(t__name__t
__module__t__doc__R	RR(((s /sys/lib/python2.7/HTMLParser.pyR9st
HTMLParsercBs
eZdZdZd�Zd�Zd�Zd�Zd�ZdZ
d�Zd	�Zd
�Z
d�Zd�Zd
d�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�ZdZd�Z RS( s�Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  Entity references are
    passed by calling self.handle_entityref() with the entity
    reference as the argument.  Numeric character references are
    passed to self.handle_charref() with the string containing the
    reference as the argument.
    tscripttstylecCs|j�dS(s#Initialize and reset this instance.N(treset(R((s /sys/lib/python2.7/HTMLParser.pyRbscCs8d|_d|_t|_d|_tjj|�dS(s1Reset this instance.  Loses all unprocessed data.ts???N(	trawdatatlasttagtinteresting_normaltinterestingR	t
cdata_elemt
markupbaset
ParserBaseR(R((s /sys/lib/python2.7/HTMLParser.pyRfs
				cCs!|j||_|jd�dS(s�Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        iN(Rtgoahead(Rtdata((s /sys/lib/python2.7/HTMLParser.pytfeednscCs|jd�dS(sHandle any buffered data.iN(R(R((s /sys/lib/python2.7/HTMLParser.pytclosewscCst||j���dS(N(Rtgetpos(Rtmessage((s /sys/lib/python2.7/HTMLParser.pyterror{scCs|jS(s)Return full source of start tag: '<...>'.(t_HTMLParser__starttag_text(R((s /sys/lib/python2.7/HTMLParser.pytget_starttag_text�scCs2|j�|_tjd|jtj�|_dS(Ns</\s*%s\s*>(tlowerRtretcompiletIR(Rtelem((s /sys/lib/python2.7/HTMLParser.pytset_cdata_mode�scCst|_d|_dS(N(RRR	R(R((s /sys/lib/python2.7/HTMLParser.pytclear_cdata_mode�s	c
Cs||j}d}t|�}x||kr%|jj||�}|rT|j�}n|jraPn|}||kr�|j|||!�n|j||�}||kr�Pn|j}|d|�r7t	j
||�r�j|�}n�|d|�r	|j|�}n�|d|�r*|j
|�}nm|d|�rK|j|�}nL|d|�rl|j|�}n+|d|kr�|jd�|d}nP|dkr"|s�Pn|jd|d�}|dkr|jd|d�}|dkr|d}qn
|d7}|j|||!�n|j||�}q|d	|�rtj
||�}|r�|j�d
d!}	|j|	�|j�}|d|d�s�|d}n|j||�}qq"d||kr|j|||d
!�|j||d
�}nPq|d
|�rtj
||�}|r�|jd�}	|j|	�|j�}|d|d�sv|d}n|j||�}qntj
||�}|r�|r�|j�||kr�|jd�nPq"|d|kr|jd
�|j||d�}q"Pqdstd��qW|rk||krk|jrk|j|||!�|j||�}n|||_dS(Nit<s</s<!--s<?s<!iRs&#ii�t;t&s#EOF in middle of entity or char refsinteresting.search() lied(RtlenRtsearchtstartRthandle_datat	updatepost
startswithtstarttagopentmatchtparse_starttagtparse_endtagt
parse_commenttparse_pitparse_html_declarationtfindtcharreftgroupthandle_charreftendt	entityrefthandle_entityreft
incompleteR!R(
RR?RtitnR5tjR3tktname((s /sys/lib/python2.7/HTMLParser.pyR�s�			







cCs�|j}|||d!dkr0|jd�n|||d!dkrT|j|�S|||d!dkrx|j|�S|||d!j�d	kr�|jd
|d�}|dkr�dS|j||d|!�|dS|j|�SdS(
Nis<!s+unexpected call to parse_html_declaration()is<!--is<![i	s	<!doctypeRi�i(RR!R8tparse_marked_sectionR$R;thandle_decltparse_bogus_comment(RRCRtgtpos((s /sys/lib/python2.7/HTMLParser.pyR:�s	

icCs|j}|||d!dkr0|jd�n|jd|d�}|dkrVdS|rw|j||d|!�n|dS(	Nis<!s</s"unexpected call to parse_comment()Ri�i(s<!s</(RR!R;thandle_comment(RRCtreportRtpos((s /sys/lib/python2.7/HTMLParser.pyRJs	cCs�|j}|||d!dks,td��tj||d�}|sLdS|j�}|j||d|!�|j�}|S(Nis<?sunexpected call to parse_pi()i�(RRtpicloseR/R0t	handle_piR?(RRCRR5RE((s /sys/lib/python2.7/HTMLParser.pyR9s	#cCs�d|_|j|�}|dkr(|S|j}|||!|_g}tj||d�}|sotd��|j�}|jd�j	�|_
}x�|kr�tj||�}|s�Pn|jddd�\}	}
}|
s�}nX|d dko|dkns7|d dko2|dknrG|dd!}n|r_|j|�}n|j
|	j	�|f�|j�}q�W|||!j�}|dkr+|j�\}
}d|jkr|
|jjd�}
t|j�|jjd�}n|t|j�}|j|||!�|S|jd
�rM|j||�n/|j||�||jkr||j|�n|S(
Niis#unexpected call to parse_starttag()iis'i�t"Rs/>s
(Rs/>(R	R"tcheck_for_whole_start_tagRttagfindR5RR?R=R$RtattrfindtunescapetappendtstripRtcountR.trfindR1tendswiththandle_startendtagthandle_starttagtCDATA_CONTENT_ELEMENTSR)(RRCtendposRtattrsR5RFttagtmtattrnametrestt	attrvalueR?RR((s /sys/lib/python2.7/HTMLParser.pyR6sR			$$cCs�|j}tj||�}|r�j�}|||d!}|dkrR|dS|dkr�|jd|�rx|dS|jd|�r�dS|j||d�|jd�n|dkr�dS|d	kr�dS||kr�S|dSntd
��dS(NiRt/s/>ii�smalformed empty start tagRs6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZswe should not get here!(RtlocatestarttagendR5R?R3R2R!R(RRCRRaREtnext((s /sys/lib/python2.7/HTMLParser.pyRRNs,	cCs�|j}|||d!dks,td��tj||d�}|sLdS|j�}tj||�}|s$|jdk	r�|j	|||!�|St
j||d�}|s�||d!dkr�|dS|j|�Sn|jd�j
�}|jd|j��}|j|�|dS|jd�j
�}|jdk	rr||jkrr|j	|||!�|Sn|j|�|j�|S(	Nis</sunexpected call to parse_endtagii�is</>R(RRt	endendtagR/R?t
endtagfindR5RR	R1RSRJR=R$R;t
handle_endtagR*(RRCRR5RKt	namematchttagnameR(((s /sys/lib/python2.7/HTMLParser.pyR7ns8	#


cCs!|j||�|j|�dS(N(R\Rj(RR`R_((s /sys/lib/python2.7/HTMLParser.pyR[�scCsdS(N((RR`R_((s /sys/lib/python2.7/HTMLParser.pyR\�scCsdS(N((RR`((s /sys/lib/python2.7/HTMLParser.pyRj�scCsdS(N((RRG((s /sys/lib/python2.7/HTMLParser.pyR>�scCsdS(N((RRG((s /sys/lib/python2.7/HTMLParser.pyRA�scCsdS(N((RR((s /sys/lib/python2.7/HTMLParser.pyR1�scCsdS(N((RR((s /sys/lib/python2.7/HTMLParser.pyRL�scCsdS(N((Rtdecl((s /sys/lib/python2.7/HTMLParser.pyRI�scCsdS(N((RR((s /sys/lib/python2.7/HTMLParser.pyRP�scCsdS(N((RR((s /sys/lib/python2.7/HTMLParser.pytunknown_decl�scs2d|kr|S�fd�}tjd||�S(NR-cs|j�d}yZ|ddkri|d}|dd
krSt|dd�}nt|�}t|�SWntk
r�d|dSXd	dl}tjdkr�d
d6}t_x0|jj	�D]\}}t|�||<q�Wny�j|SWnt
k
rd|dSXdS(Nit#itxtXis&#R,i�u'taposR-(RpRq(tgroupstinttunichrt
ValueErrorthtmlentitydefsRt
entitydefsR	tname2codepointt	iteritemstKeyError(tstcRwRxRFtv(R(s /sys/lib/python2.7/HTMLParser.pytreplaceEntities�s&



s#&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));(R%tsub(RR|R((Rs /sys/lib/python2.7/HTMLParser.pyRU�s(RRN(!RR
RR]RRRRR!R	R"R#R)R*RR:RJR9R6RRR7R[R\RjR>RAR1RLRIRPRnRxRU(((s /sys/lib/python2.7/HTMLParser.pyRKs<										^			4	 	(										(RRR%R&RRBR@R<R4ROtcommentcloseRSttagfind_tolerantRTtVERBOSERfRhRit	ExceptionRRR(((s /sys/lib/python2.7/HTMLParser.pyt<module>s&
	


Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].