D: [iurt_root_command] chroot
Installing /home/iurt/rpmbuild/SRPMS/python-beautifulsoup4-4.8.1-1.mga8.src.rpm
Building target platforms: aarch64
Building for target aarch64
Executing(%prep): /bin/sh -e /home/iurt/rpmbuild/tmp/rpm-tmp.O0V2sd
+ umask 022
+ cd /home/iurt/rpmbuild/BUILD
+ '[' 1 -eq 1 ']'
+ '[' 1 -eq 1 ']'
+ '[' 1 -eq 1 ']'
+ cd /home/iurt/rpmbuild/BUILD
+ rm -rf beautifulsoup4-4.8.1
+ /usr/bin/gzip -dc /home/iurt/rpmbuild/SOURCES/beautifulsoup4-4.8.1.tar.gz
+ /usr/bin/tar -xof -
+ STATUS=0
+ '[' 0 -ne 0 ']'
+ cd beautifulsoup4-4.8.1
+ /usr/bin/chmod -Rf a+rX,u+w,g-w,o-w .
+ RPM_EC=0
++ jobs -p
+ exit 0
Executing(%build): /bin/sh -e /home/iurt/rpmbuild/tmp/rpm-tmp.dov0xc
+ umask 022
+ cd /home/iurt/rpmbuild/BUILD
+ cd beautifulsoup4-4.8.1
+ '[' 1 -eq 1 ']'
+ '[' 1 -eq 1 ']'
+ 2to3 --write --nobackups .
RefactoringTool: Skipping optional fixer: buffer
RefactoringTool: Skipping optional fixer: idioms
RefactoringTool: Skipping optional fixer: set_literal
RefactoringTool: Skipping optional fixer: ws_comma
RefactoringTool: No changes to ./setup.py
RefactoringTool: Refactored ./bs4/__init__.py
RefactoringTool: No changes to ./bs4/check_block.py
RefactoringTool: Refactored ./bs4/dammit.py
--- ./bs4/__init__.py (original)
+++ ./bs4/__init__.py (refactored)
@@ -49,7 +49,7 @@
# The very first thing we do is give a useful error if someone is
# running this code under Python 3 without converting it.
-'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
class BeautifulSoup(Tag):
"""
@@ -73,7 +73,7 @@
like HTML's
tag), call handle_starttag and then
handle_endtag.
"""
- ROOT_TAG_NAME = u'[document]'
+ ROOT_TAG_NAME = '[document]'
# If the end-user gives no indication which tree builder they
# want, look for one with these features.
@@ -187,7 +187,7 @@
from_encoding = from_encoding or deprecated_argument(
"fromEncoding", "from_encoding")
- if from_encoding and isinstance(markup, unicode):
+ if from_encoding and isinstance(markup, str):
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
from_encoding = None
@@ -204,7 +204,7 @@
builder_class = builder
builder = None
elif builder is None:
- if isinstance(features, basestring):
+ if isinstance(features, str):
features = [features]
if features is None or len(features) == 0:
features = self.DEFAULT_BUILDER_FEATURES
@@ -274,13 +274,13 @@
markup = markup.read()
elif len(markup) <= 256 and (
(isinstance(markup, bytes) and not b'<' in markup)
- or (isinstance(markup, unicode) and not u'<' in markup)
+ or (isinstance(markup, str) and not '<' in markup)
):
# Print out warnings for a couple beginner problems
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
# just in case that's what the user really wants.
- if (isinstance(markup, unicode)
+ if (isinstance(markup, str)
and not os.path.supports_unicode_filenames):
possible_filename = markup.encode("utf8")
else:
@@ -288,13 +288,13 @@
is_file = False
try:
is_file = os.path.exists(possible_filename)
- except Exception, e:
+ except Exception as e:
# This is almost certainly a problem involving
# characters not valid in filenames on this
# system. Just let it go.
pass
if is_file:
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
markup = markup.encode("utf8")
warnings.warn(
'"%s" looks like a filename, not markup. You should'
@@ -318,9 +318,9 @@
pass
if not success:
- other_exceptions = [unicode(e) for e in rejections]
+ other_exceptions = [str(e) for e in rejections]
raise ParserRejectedMarkup(
- u"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
+ "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
)
# Clear out the markup and remove the builder's circular
@@ -356,9 +356,9 @@
if isinstance(markup, bytes):
space = b' '
cant_start_with = (b"http:", b"https:")
- elif isinstance(markup, unicode):
- space = u' '
- cant_start_with = (u"http:", u"https:")
+ elif isinstance(markup, str):
+ space = ' '
+ cant_start_with = ("http:", "https:")
else:
return
@@ -447,7 +447,7 @@
)
if self.current_data:
- current_data = u''.join(self.current_data)
+ current_data = ''.join(self.current_data)
# If whitespace is not preserved, and this string contains
# nothing but ASCII spaces, replace it with a single space
# or newline.
@@ -616,9 +616,9 @@
encoding_part = ''
if eventual_encoding != None:
encoding_part = ' encoding="%s"' % eventual_encoding
- prefix = u'\n' % encoding_part
+ prefix = '\n' % encoding_part
else:
- prefix = u''
+ prefix = ''
if not pretty_print:
indent_level = None
else:
@@ -652,4 +652,4 @@
if __name__ == '__main__':
import sys
soup = BeautifulSoup(sys.stdin)
- print soup.prettify()
+ print(soup.prettify())
--- ./bs4/dammit.py (original)
+++ ./bs4/dammit.py (refactored)
@@ -10,7 +10,7 @@
__license__ = "MIT"
import codecs
-from htmlentitydefs import codepoint2name
+from html.entities import codepoint2name
import re
import logging
import string
@@ -22,7 +22,7 @@
# PyPI package: cchardet
import cchardet
def chardet_dammit(s):
- if isinstance(s, unicode):
+ if isinstance(s, str):
return None
return cchardet.detect(s)['encoding']
except ImportError:
@@ -32,7 +32,7 @@
# PyPI package: chardet
import chardet
def chardet_dammit(s):
- if isinstance(s, unicode):
+ if isinstance(s, str):
return None
return chardet.detect(s)['encoding']
#import chardet.constants
@@ -50,14 +50,14 @@
# Build bytestring and Unicode versions of regular expressions for finding
# a declared encoding inside an XML or HTML document.
-xml_encoding = u'^\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'
-html_meta = u'<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'
+xml_encoding = '^\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'
+html_meta = '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'
encoding_res = dict()
encoding_res[bytes] = {
'html' : re.compile(html_meta.encode("ascii"), re.I),
'xml' : re.compile(xml_encoding.encode("ascii"), re.I),
}
-encoding_res[unicode] = {
+encoding_res[str] = {
'html' : re.compile(html_meta, re.I),
'xml' : re.compile(xml_encoding, re.I)
}
@@ -78,7 +78,7 @@
# entities, but that's a little tricky.
extra = [(39, 'apos')]
for codepoint, name in list(codepoint2name.items()) + extra:
- character = unichr(codepoint)
+ character = chr(codepoint)
if codepoint not in (34, 39):
# There's no point in turning the quotation mark into
# " or the single quote into ', unless it
@@ -295,7 +295,7 @@
def strip_byte_order_mark(cls, data):
"""If a byte-order mark is present, strip it and return the encoding it implies."""
encoding = None
- if isinstance(data, unicode):
+ if isinstance(data, str):
# Unicode data cannot have a byte-order mark.
return data, encoding
if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
@@ -335,7 +335,7 @@
if isinstance(markup, bytes):
res = encoding_res[bytes]
else:
- res = encoding_res[unicode]
+ res = encoding_res[str]
xml_re = res['xml']
html_re = res['html']
@@ -381,9 +381,9 @@
markup, override_encodings, is_html, exclude_encodings)
# Short-circuit if the data is in Unicode to begin with.
- if isinstance(markup, unicode) or markup == '':
+ if isinstance(markup, str) or markup == '':RefactoringTool: Refactored ./bs4/diagnose.py
RefactoringTool: Refactored ./bs4/element.py
self.markup = markup
- self.unicode_markup = unicode(markup)
+ self.unicode_markup = str(markup)
self.original_encoding = None
return
@@ -467,7 +467,7 @@
def _to_unicode(self, data, encoding, errors="strict"):
'''Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases'''
- return unicode(data, encoding, errors)
+ return str(data, encoding, errors)
@property
def declared_html_encoding(self):
--- ./bs4/diagnose.py (original)
+++ ./bs4/diagnose.py (refactored)
@@ -4,8 +4,8 @@
__license__ = "MIT"
import cProfile
-from StringIO import StringIO
-from HTMLParser import HTMLParser
+from io import StringIO
+from html.parser import HTMLParser
import bs4
from bs4 import BeautifulSoup, __version__
from bs4.builder import builder_registry
@@ -21,8 +21,8 @@
def diagnose(data):
"""Diagnostic suite for isolating common problems."""
- print "Diagnostic running on Beautiful Soup %s" % __version__
- print "Python version %s" % sys.version
+ print("Diagnostic running on Beautiful Soup %s" % __version__)
+ print("Python version %s" % sys.version)
basic_parsers = ["html.parser", "html5lib", "lxml"]
for name in basic_parsers:
@@ -31,16 +31,16 @@
break
else:
basic_parsers.remove(name)
- print (
+ print((
"I noticed that %s is not installed. Installing it may help." %
- name)
+ name))
if 'lxml' in basic_parsers:
basic_parsers.append("lxml-xml")
try:
from lxml import etree
- print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
- except ImportError, e:
+ print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
+ except ImportError as e:
print (
"lxml is not installed or couldn't be imported.")
@@ -48,43 +48,43 @@
if 'html5lib' in basic_parsers:
try:
import html5lib
- print "Found html5lib version %s" % html5lib.__version__
- except ImportError, e:
+ print("Found html5lib version %s" % html5lib.__version__)
+ except ImportError as e:
print (
"html5lib is not installed or couldn't be imported.")
if hasattr(data, 'read'):
data = data.read()
elif data.startswith("http:") or data.startswith("https:"):
- print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
- print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
+ print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
+ print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
return
else:
try:
if os.path.exists(data):
- print '"%s" looks like a filename. Reading data from the file.' % data
+ print('"%s" looks like a filename. Reading data from the file.' % data)
with open(data) as fp:
data = fp.read()
except ValueError:
# This can happen on some platforms when the 'filename' is
# too long. Assume it's data and not a filename.
pass
- print
+ print()
for parser in basic_parsers:
- print "Trying to parse your markup with %s" % parser
+ print("Trying to parse your markup with %s" % parser)
success = False
try:
soup = BeautifulSoup(data, features=parser)
success = True
- except Exception, e:
- print "%s could not parse the markup." % parser
+ except Exception as e:
+ print("%s could not parse the markup." % parser)
traceback.print_exc()
if success:
- print "Here's what %s did with the markup:" % parser
- print soup.prettify()
-
- print "-" * 80
+ print("Here's what %s did with the markup:" % parser)
+ print(soup.prettify())
+
+ print("-" * 80)
def lxml_trace(data, html=True, **kwargs):
"""Print out the lxml events that occur during parsing.
@@ -94,7 +94,7 @@
"""
from lxml import etree
for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
- print("%s, %4s, %s" % (event, element.tag, element.text))
+ print(("%s, %4s, %s" % (event, element.tag, element.text)))
class AnnouncingParser(HTMLParser):
"""Announces HTMLParser parse events, without doing anything else."""
@@ -176,9 +176,9 @@
def benchmark_parsers(num_elements=100000):
"""Very basic head-to-head performance benchmark."""
- print "Comparative parser benchmark on Beautiful Soup %s" % __version__
+ print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
data = rdoc(num_elements)
- print "Generated a large invalid HTML document (%d bytes)." % len(data)
+ print("Generated a large invalid HTML document (%d bytes)." % len(data))
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
success = False
@@ -187,24 +187,24 @@
soup = BeautifulSoup(data, parser)
b = time.time()
success = True
- except Exception, e:
- print "%s could not parse the markup." % parser
+ except Exception as e:
+ print("%s could not parse the markup." % parser)
traceback.print_exc()
if success:
- print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
+ print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
from lxml import etree
a = time.time()
etree.HTML(data)
b = time.time()
- print "Raw lxml parsed the markup in %.2fs." % (b-a)
+ print("Raw lxml parsed the markup in %.2fs." % (b-a))
import html5lib
parser = html5lib.HTMLParser()
a = time.time()
parser.parse(data)
b = time.time()
- print "Raw html5lib parsed the markup in %.2fs." % (b-a)
+ print("Raw html5lib parsed the markup in %.2fs." % (b-a))
def profile(num_elements=100000, parser="lxml"):
--- ./bs4/element.py (original)
+++ ./bs4/element.py (refactored)
@@ -3,14 +3,14 @@
try:
from collections.abc import Callable # Python 3.6
-except ImportError , e:
+except ImportError as e:
from collections import Callable
import re
import sys
import warnings
try:
import soupsieve
-except ImportError, e:
+except ImportError as e:
soupsieve = None
warnings.warn(
'The soupsieve package is not installed. CSS selectors cannot be used.'
@@ -43,7 +43,7 @@
return alias
-class NamespacedAttribute(unicode):
+class NamespacedAttribute(str):
def __new__(cls, prefix, name=None, namespace=None):
if not name:
@@ -52,18 +52,18 @@
name = None
if name is None:
- obj = unicode.__new__(cls, prefix)
+ obj = str.__new__(cls, prefix)
elif prefix is None:
# Not really namespaced.
- obj = unicode.__new__(cls, name)
- else:
- obj = unicode.__new__(cls, prefix + ":" + name)
+ obj = str.__new__(cls, name)
+ else:
+ obj = str.__new__(cls, prefix + ":" + name)
obj.prefix = prefix
obj.name = name
obj.namespace = namespace
return obj
-class AttributeValueWithCharsetSubstitution(unicode):
+class AttributeValueWithCharsetSubstitution(str):
"""A stand-in object for a character encoding specified in HTML."""
class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
@@ -74,7 +74,7 @@
"""
def __new__(cls, original_value):
- obj = unicode.__new__(cls, original_value)
+ obj = str.__new__(cls, original_value)
obj.original_value = original_value
return obj
@@ -97,9 +97,9 @@
match = cls.CHARSET_RE.search(original_value)RefactoringTool: Refactored ./bs4/formatter.py
RefactoringTool: Refactored ./bs4/testing.py
if match is None:
# No substitution necessary.
- return unicode.__new__(unicode, original_value)
-
- obj = unicode.__new__(cls, original_value)
+ return str.__new__(str, original_value)
+
+ obj = str.__new__(cls, original_value)
obj.original_value = original_value
return obj
@@ -276,7 +276,7 @@
raise ValueError("Cannot insert None into a tag.")
if new_child is self:
raise ValueError("Cannot insert a tag into itself.")
- if (isinstance(new_child, basestring)
+ if (isinstance(new_child, str)
and not isinstance(new_child, NavigableString)):
new_child = NavigableString(new_child)
@@ -516,7 +516,7 @@
result = (element for element in generator
if isinstance(element, Tag))
return ResultSet(strainer, result)
- elif isinstance(name, basestring):
+ elif isinstance(name, str):
# Optimization to find all tags with a given name.
if name.count(':') == 1:
# This is a name with a prefix. If this is a namespace-aware document,
@@ -605,7 +605,7 @@
return self.parents
-class NavigableString(unicode, PageElement):
+class NavigableString(str, PageElement):
PREFIX = ''
SUFFIX = ''
@@ -623,10 +623,10 @@
passed in to the superclass's __new__ or the superclass won't know
how to handle non-ASCII characters.
"""
- if isinstance(value, unicode):
- u = unicode.__new__(cls, value)
- else:
- u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+ if isinstance(value, str):
+ u = str.__new__(cls, value)
+ else:
+ u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
u.setup()
return u
@@ -637,7 +637,7 @@
return type(self)(self)
def __getnewargs__(self):
- return (unicode(self),)
+ return (str(self),)
def __getattr__(self, attr):
"""text.string gives you text. This is for backwards
@@ -680,29 +680,29 @@
class CData(PreformattedString):
- PREFIX = u''
+ PREFIX = ''
class ProcessingInstruction(PreformattedString):
"""A SGML processing instruction."""
- PREFIX = u''
- SUFFIX = u'>'
+ PREFIX = ''
+ SUFFIX = '>'
class XMLProcessingInstruction(ProcessingInstruction):
"""An XML processing instruction."""
- PREFIX = u''
- SUFFIX = u'?>'
+ PREFIX = ''
+ SUFFIX = '?>'
class Comment(PreformattedString):
- PREFIX = u''
+ PREFIX = ''
class Declaration(PreformattedString):
- PREFIX = u''
- SUFFIX = u'?>'
+ PREFIX = ''
+ SUFFIX = '?>'
class Doctype(PreformattedString):
@@ -719,8 +719,8 @@
return Doctype(value)
- PREFIX = u'\n'
+ PREFIX = '\n'
class Tag(PageElement):
@@ -885,7 +885,7 @@
for string in self._all_strings(True):
yield string
- def get_text(self, separator=u"", strip=False,
+ def get_text(self, separator="", strip=False,
types=(NavigableString, CData)):
"""
Get all child strings, concatenated using the given separator.
@@ -1001,7 +1001,7 @@
def __contains__(self, x):
return x in self.contents
- def __nonzero__(self):
+ def __bool__(self):
"A tag is non-None even if it has no contents."
return True
@@ -1116,8 +1116,8 @@
else:
if isinstance(val, list) or isinstance(val, tuple):
val = ' '.join(val)
- elif not isinstance(val, basestring):
- val = unicode(val)
+ elif not isinstance(val, str):
+ val = str(val)
elif (
isinstance(val, AttributeValueWithCharsetSubstitution)
and eventual_encoding is not None
@@ -1126,7 +1126,7 @@
text = formatter.attribute_value(val)
decoded = (
- unicode(key) + '='
+ str(key) + '='
+ formatter.quoted_attribute_value(text))
attrs.append(decoded)
close = ''
@@ -1397,7 +1397,7 @@
else:
attrs = kwargs
normalized_attrs = {}
- for key, value in attrs.items():
+ for key, value in list(attrs.items()):
normalized_attrs[key] = self._normalize_search_value(value)
self.attrs = normalized_attrs
@@ -1406,7 +1406,7 @@
def _normalize_search_value(self, value):
# Leave it alone if it's a Unicode string, a callable, a
# regular expression, a boolean, or None.
- if (isinstance(value, unicode) or isinstance(value, Callable) or hasattr(value, 'match')
+ if (isinstance(value, str) or isinstance(value, Callable) or hasattr(value, 'match')
or isinstance(value, bool) or value is None):
return value
@@ -1419,7 +1419,7 @@
new_value = []
for v in value:
if (hasattr(v, '__iter__') and not isinstance(v, bytes)
- and not isinstance(v, unicode)):
+ and not isinstance(v, str)):
# This is almost certainly the user's mistake. In the
# interests of avoiding infinite loops, we'll let
# it through as-is rather than doing a recursive call.
@@ -1431,7 +1431,7 @@
# Otherwise, convert it into a Unicode string.
# The unicode(str()) thing is so this will do the same thing on Python 2
# and Python 3.
- return unicode(str(value))
+ return str(str(value))
def __str__(self):
if self.text:
@@ -1485,7 +1485,7 @@
found = None
# If given a list of items, scan it for a text element that
# matches.
- if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
+ if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)):
for element in markup:
if isinstance(element, NavigableString) \
and self.search(element):
@@ -1498,7 +1498,7 @@
found = self.search_tag(markup)
# If it's text, make sure the text matches.
elif isinstance(markup, NavigableString) or \
- isinstance(markup, basestring):
+ isinstance(markup, str):
if not self.name and not self.attrs and self._matches(markup, self.text):
found = markup
else:
@@ -1543,7 +1543,7 @@
return not match_against
if (hasattr(match_against, '__iter__')
- and not isinstance(match_against, basestring)):
+ and not isinstance(match_against, str)):
# We're asked to match against an iterable of items.
# The markup must be match at least one item in the
# iterable. We'll try each one in turn.
@@ -1570,7 +1570,7 @@
# the tag's name and once against its prefixed name.
match = False
- if not match and isinstance(match_against, unicode):
+ if not match and isinstance(match_against, str):
# Exact string match
match = markup == match_against
--- ./bs4/formatter.py (original)
+++ ./bs4/formatter.py (refactored)
@@ -44,7 +44,7 @@
"""Process a string that needs to undergo entity substitution."""
if not self.entity_substitution:
return ns
- from element import NavigableString
+ from .element import NavigableString
if (isinstance(ns, NavigableString)
and ns.parent is not None
and ns.parent.name in self.cdata_containing_tags):
--- ./bs4/testing.py (original)
+++ ./bs4/testing.py (refactored)
@@ -22,7 +22,7 @@
from bs4.builder import HTMLParserTreeBuilderRefactoringTool: Refactored ./bs4/builder/__init__.py
default_builder = HTMLParserTreeBuilder
-BAD_DOCUMENT = u"""A bare string
+BAD_DOCUMENT = """A bare string
@@ -315,7 +315,7 @@
# process_markup correctly sets processing_instruction_class
# even when the markup is already Unicode and there is no
# need to process anything.
- markup = u""""""
+ markup = """"""
soup = self.soup(markup)
self.assertEqual(markup, soup.decode())
@@ -487,14 +487,14 @@
# "&T" and "&p" look like incomplete character entities, but they are
# not.
self.assertSoupEquals(
- u"• AT&T is in the s&p 500
",
- u"\u2022 AT&T is in the s&p 500
"
+ "• AT&T is in the s&p 500
",
+ "\u2022 AT&T is in the s&p 500
"
)
def test_apos_entity(self):
self.assertSoupEquals(
- u"Bob's Bar
",
- u"Bob's Bar
",
+ "Bob's Bar
",
+ "Bob's Bar
",
)
def test_entities_in_foreign_document_encoding(self):
@@ -507,17 +507,17 @@
# characters.
markup = "Hello -☃
"
soup = self.soup(markup)
- self.assertEquals(u"“Hello” -☃", soup.p.string)
+ self.assertEqual("“Hello” -☃", soup.p.string)
def test_entities_in_attributes_converted_to_unicode(self):
- expect = u''
+ expect = ''
self.assertSoupEquals('', expect)
self.assertSoupEquals('', expect)
self.assertSoupEquals('', expect)
self.assertSoupEquals('', expect)
def test_entities_in_text_converted_to_unicode(self):
- expect = u'pi\N{LATIN SMALL LETTER N WITH TILDE}ata
'
+ expect = 'pi\N{LATIN SMALL LETTER N WITH TILDE}ata
'
self.assertSoupEquals("piñata
", expect)
self.assertSoupEquals("piñata
", expect)
self.assertSoupEquals("piñata
", expect)
@@ -528,7 +528,7 @@
'I said "good day!"
')
def test_out_of_range_entity(self):
- expect = u"\N{REPLACEMENT CHARACTER}"
+ expect = "\N{REPLACEMENT CHARACTER}"
self.assertSoupEquals("", expect)
self.assertSoupEquals("", expect)
self.assertSoupEquals("", expect)
@@ -606,9 +606,9 @@
# A seemingly innocuous document... but it's in Unicode! And
# it contains characters that can't be represented in the
# encoding found in the declaration! The horror!
- markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!'
- soup = self.soup(markup)
- self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
+ markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!'
+ soup = self.soup(markup)
+ self.assertEqual('Sacr\xe9 bleu!', soup.body.string)
def test_soupstrainer(self):
"""Parsers should be able to work with SoupStrainers."""
@@ -648,7 +648,7 @@
# Both XML and HTML entities are converted to Unicode characters
# during parsing.
text = "<<sacré bleu!>>
"
- expected = u"<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>
"
+ expected = "<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>
"
self.assertSoupEquals(text, expected)
def test_smart_quotes_converted_on_the_way_in(self):
@@ -658,15 +658,15 @@
soup = self.soup(quote)
self.assertEqual(
soup.p.string,
- u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
+ "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
def test_non_breaking_spaces_converted_on_the_way_in(self):
soup = self.soup(" ")
- self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
+ self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2)
def test_entities_converted_on_the_way_out(self):
text = "<<sacré bleu!>>
"
- expected = u"<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>
".encode("utf-8")
+ expected = "<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>
".encode("utf-8")
soup = self.soup(text)
self.assertEqual(soup.p.encode("utf-8"), expected)
@@ -675,7 +675,7 @@
# easy-to-understand document.
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
- unicode_html = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!
'
+ unicode_html = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!
'
# That's because we're going to encode it into ISO-Latin-1, and use
# that to test.
@@ -853,15 +853,15 @@
self.assertTrue(b"< < hey > >" in encoded)
def test_can_parse_unicode_document(self):
- markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!'
- soup = self.soup(markup)
- self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
+ markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!'
+ soup = self.soup(markup)
+ self.assertEqual('Sacr\xe9 bleu!', soup.root.string)
def test_popping_namespaced_tag(self):
markup = 'b2012-07-02T20:33:42Zcd'
soup = self.soup(markup)
self.assertEqual(
- unicode(soup.rss), markup)
+ str(soup.rss), markup)
def test_docstring_includes_correct_encoding(self):
soup = self.soup("")
@@ -892,17 +892,17 @@
def test_closing_namespaced_tag(self):
markup = '20010504
'
soup = self.soup(markup)
- self.assertEqual(unicode(soup.p), markup)
+ self.assertEqual(str(soup.p), markup)
def test_namespaced_attributes(self):
markup = ''
soup = self.soup(markup)
- self.assertEqual(unicode(soup.foo), markup)
+ self.assertEqual(str(soup.foo), markup)
def test_namespaced_attributes_xml_namespace(self):
markup = 'bar'
soup = self.soup(markup)
- self.assertEqual(unicode(soup.foo), markup)
+ self.assertEqual(str(soup.foo), markup)
def test_find_by_prefixed_name(self):
doc = """
--- ./bs4/builder/__init__.py (original)
+++ ./bs4/builder/__init__.py (refactored)
@@ -205,13 +205,13 @@
universal = self.cdata_list_attributes.get('*', [])
tag_specific = self.cdata_list_attributes.get(
tag_name.lower(), None)
- for attr in attrs.keys():
+ for attr in list(attrs.keys()):
if attr in universal or (tag_specific and attr in tag_specific):
# We have a "class"-type attribute whose string
# value is a whitespace-separated list of
# values. Split it into a list.RefactoringTool: Refactored ./bs4/builder/_html5lib.py
RefactoringTool: Refactored ./bs4/builder/_htmlparser.py
RefactoringTool: Refactored ./bs4/builder/_lxml.py
value = attrs[attr]
- if isinstance(value, basestring):
+ if isinstance(value, str):
values = nonwhitespace_re.findall(value)
else:
# html5lib sometimes calls setAttributes twice
@@ -369,7 +369,7 @@
"""
if isinstance(message_or_exception, Exception):
e = message_or_exception
- message_or_exception = "%s: %s" % (e.__class__.__name__, unicode(e))
+ message_or_exception = "%s: %s" % (e.__class__.__name__, str(e))
super(ParserRejectedMarkup, self).__init__(message_or_exception)
# Builders are registered in reverse order of priority, so that custom
--- ./bs4/builder/_html5lib.py (original)
+++ ./bs4/builder/_html5lib.py (refactored)
@@ -33,7 +33,7 @@
# Pre-0.99999999
from html5lib.treebuilders import _base as treebuilder_base
new_html5lib = False
-except ImportError, e:
+except ImportError as e:
# 0.99999999 and up
from html5lib.treebuilders import base as treebuilder_base
new_html5lib = True
@@ -68,7 +68,7 @@
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
self.underlying_builder.parser = parser
extra_kwargs = dict()
- if not isinstance(markup, unicode):
+ if not isinstance(markup, str):
if new_html5lib:
extra_kwargs['override_encoding'] = self.user_specified_encoding
else:
@@ -76,13 +76,13 @@
doc = parser.parse(markup, **extra_kwargs)
# Set the character encoding detected by the tokenizer.
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
# We need to special-case this because html5lib sets
# charEncoding to UTF-8 if it gets Unicode input.
doc.original_encoding = None
else:
original_encoding = parser.tokenizer.stream.charEncoding[0]
- if not isinstance(original_encoding, basestring):
+ if not isinstance(original_encoding, str):
# In 0.99999999 and up, the encoding is an html5lib
# Encoding object. We want to use a string for compatibility
# with other tree builders.
@@ -99,7 +99,7 @@
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
- return u'%s' % fragment
+ return '%s' % fragment
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
@@ -203,7 +203,7 @@
rv.append("|%s<%s>" % (' ' * indent, name))
if element.attrs:
attributes = []
- for name, value in element.attrs.items():
+ for name, value in list(element.attrs.items()):
if isinstance(name, NamespacedAttribute):
name = "%s %s" % (prefixes[name.namespace], name.name)
if isinstance(value, list):
@@ -258,7 +258,7 @@
def appendChild(self, node):
string_child = child = None
- if isinstance(node, basestring):
+ if isinstance(node, str):
# Some other piece of code decided to pass in a string
# instead of creating a TextElement object to contain the
# string.
@@ -275,7 +275,7 @@
child = node.element
node.parent = self
- if not isinstance(child, basestring) and child.parent is not None:
+ if not isinstance(child, str) and child.parent is not None:
node.element.extract()
if (string_child is not None and self.element.contents
@@ -288,7 +288,7 @@
old_element.replace_with(new_element)
self.soup._most_recent_element = new_element
else:
- if isinstance(node, basestring):
+ if isinstance(node, str):
# Create a brand new NavigableString from this string.
child = self.soup.new_string(node)
@@ -328,7 +328,7 @@
self.soup.builder._replace_cdata_list_attribute_values(
self.name, attributes)
- for name, value in attributes.items():
+ for name, value in list(attributes.items()):
self.element[name] = value
# The attributes may contain variables that need substitution.
--- ./bs4/builder/_htmlparser.py (original)
+++ ./bs4/builder/_htmlparser.py (refactored)
@@ -8,11 +8,11 @@
'HTMLParserTreeBuilder',
]
-from HTMLParser import HTMLParser
+from html.parser import HTMLParser
try:
- from HTMLParser import HTMLParseError
-except ImportError, e:
+ from html.parser import HTMLParseError
+except ImportError as e:
# HTMLParseError is removed in Python 3.5. Since it can never be
# thrown in 3.5, we can just define our own class as a placeholder.
class HTMLParseError(Exception):
@@ -157,14 +157,14 @@
continue
try:
data = bytearray([real_name]).decode(encoding)
- except UnicodeDecodeError, e:
+ except UnicodeDecodeError as e:
pass
if not data:
try:
- data = unichr(real_name)
- except (ValueError, OverflowError), e:
+ data = chr(real_name)
+ except (ValueError, OverflowError) as e:
pass
- data = data or u"\N{REPLACEMENT CHARACTER}"
+ data = data or "\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
def handle_entityref(self, name):
@@ -239,7 +239,7 @@
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
yield (markup, None, None, False)
return
@@ -257,7 +257,7 @@
try:
parser.feed(markup)
parser.close()
- except HTMLParseError, e:
+ except HTMLParseError as e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
--- ./bs4/builder/_lxml.py (original)
+++ ./bs4/builder/_lxml.py (refactored)
@@ -8,11 +8,11 @@
try:
from collections.abc import Callable # Python 3.6
-except ImportError , e:
+except ImportError as e:
from collections import Callable
from io import BytesIO
-from StringIO import StringIO
+from io import StringIO
from lxml import etree
from bs4.element import (
Comment,
@@ -35,7 +35,7 @@
def _invert(d):
"Invert a dictionary."
- return dict((v,k) for k, v in d.items())
+ return dict((v,k) for k, v in list(d.items()))
class LXMLTreeBuilderForXML(TreeBuilder):
DEFAULT_PARSER_CLASS = etree.XMLParser
@@ -76,7 +76,7 @@
This might be useful later on when creating CSS selectors.
"""
- for key, value in mapping.items():
+ for key, value in list(mapping.items()):
if key and key not in self.soup._namespaces:
# Let the BeautifulSoup object know about a new namespace.
# If there are multiple namespaces defined with the same
@@ -139,12 +139,12 @@
else:
self.processing_instruction_class = XMLProcessingInstruction
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
yield markup, None, document_declared_encoding, False
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
# No, apparently not. Convert the Unicode to UTF-8 and
# tell lxml to parse it as UTF-8.
yield (markup.encode("utf8"), "utf8",RefactoringTool: No changes to ./bs4/tests/__init__.py
RefactoringTool: No changes to ./bs4/tests/test_builder_registry.py
RefactoringTool: No changes to ./bs4/tests/test_docs.py
RefactoringTool: Refactored ./bs4/tests/test_html5lib.py
RefactoringTool: No changes to ./bs4/tests/test_htmlparser.py
RefactoringTool: Refactored ./bs4/tests/test_lxml.py
RefactoringTool: Refactored ./bs4/tests/test_soup.py
@@ -159,7 +159,7 @@
def feed(self, markup):
if isinstance(markup, bytes):
markup = BytesIO(markup)
- elif isinstance(markup, unicode):
+ elif isinstance(markup, str):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
@@ -174,7 +174,7 @@
if len(data) != 0:
self.parser.feed(data)
self.parser.close()
- except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(e)
def close(self):
@@ -203,7 +203,7 @@
# Also treat the namespace mapping as a set of attributes on the
# tag, so we can recreate it later.
attrs = attrs.copy()
- for prefix, namespace in nsmap.items():
+ for prefix, namespace in list(nsmap.items()):
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
@@ -212,7 +212,7 @@
# from lxml with namespaces attached to their names, and
# turn then into NamespacedAttribute objects.
new_attrs = {}
- for attr, value in attrs.items():
+ for attr, value in list(attrs.items()):
namespace, attr = self._getNsTag(attr)
if namespace is None:
new_attrs[attr] = value
@@ -272,7 +272,7 @@
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
- return u'\n%s' % fragment
+ return '\n%s' % fragment
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
@@ -293,10 +293,10 @@
self.parser = self.parser_for(encoding)
self.parser.feed(markup)
self.parser.close()
- except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(e)
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
- return u'%s' % fragment
+ return '%s' % fragment
--- ./bs4/tests/test_html5lib.py (original)
+++ ./bs4/tests/test_html5lib.py (refactored)
@@ -5,7 +5,7 @@
try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
-except ImportError, e:
+except ImportError as e:
HTML5LIB_PRESENT = False
from bs4.element import SoupStrainer
from bs4.testing import (
@@ -74,14 +74,14 @@
def test_reparented_markup(self):
markup = 'foo
\nbar
'
soup = self.soup(markup)
- self.assertEqual(u"foo
\nbar
", soup.body.decode())
+ self.assertEqual("foo
\nbar
", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
def test_reparented_markup_ends_with_whitespace(self):
markup = 'foo
\nbar
\n'
soup = self.soup(markup)
- self.assertEqual(u"foo
\nbar
\n", soup.body.decode())
+ self.assertEqual("foo
\nbar
\n", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
def test_reparented_markup_containing_identical_whitespace_nodes(self):
@@ -127,7 +127,7 @@
def test_foster_parenting(self):
markup = b"""A"""
soup = self.soup(markup)
- self.assertEqual(u"A", soup.body.decode())
+ self.assertEqual("A", soup.body.decode())
def test_extraction(self):
"""
--- ./bs4/tests/test_lxml.py (original)
+++ ./bs4/tests/test_lxml.py (refactored)
@@ -7,7 +7,7 @@
import lxml.etree
LXML_PRESENT = True
LXML_VERSION = lxml.etree.LXML_VERSION
-except ImportError, e:
+except ImportError as e:
LXML_PRESENT = False
LXML_VERSION = (0,)
@@ -68,7 +68,7 @@
# if one is installed.
with warnings.catch_warnings(record=True) as w:
soup = BeautifulStoneSoup("")
- self.assertEqual(u"", unicode(soup.b))
+ self.assertEqual("", str(soup.b))
self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
def test_tracking_line_numbers(self):
--- ./bs4/tests/test_soup.py (original)
+++ ./bs4/tests/test_soup.py (refactored)
@@ -49,17 +49,17 @@
class TestConstructor(SoupTest):
def test_short_unicode_input(self):
- data = u"éé"
+ data = "éé"
soup = self.soup(data)
- self.assertEqual(u"éé", soup.h1.string)
+ self.assertEqual("éé", soup.h1.string)
def test_embedded_null(self):
- data = u"foo\0bar"
+ data = "foo\0bar"
soup = self.soup(data)
- self.assertEqual(u"foo\0bar", soup.h1.string)
+ self.assertEqual("foo\0bar", soup.h1.string)
def test_exclude_encodings(self):
- utf8_data = u"Räksmörgås".encode("utf-8")
+ utf8_data = "Räksmörgås".encode("utf-8")
soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
self.assertEqual("windows-1252", soup.original_encoding)
@@ -124,7 +124,7 @@
yield markup, None, None, False
import re
- self.assertRaisesRegexp(
+ self.assertRaisesRegex(
ParserRejectedMarkup,
"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.",
BeautifulSoup, '', builder=Mock,
@@ -261,7 +261,7 @@
with warnings.catch_warnings(record=True) as warning_list:
# note - this url must differ from the bytes one otherwise
# python's warnings system swallows the second warning
- soup = self.soup(u"http://www.crummyunicode.com/")
+ soup = self.soup("http://www.crummyunicode.com/")
self.assertTrue(any("looks like a URL" in str(w.message)
for w in warning_list))
@@ -273,7 +273,7 @@
def test_url_warning_with_unicode_and_space(self):
with warnings.catch_warnings(record=True) as warning_list:
- soup = self.soup(u"http://www.crummyuncode.com/ is great")
+ soup = self.soup("http://www.crummyuncode.com/ is great")
self.assertFalse(any("looks like a URL" in str(w.message)
for w in warning_list))
@@ -295,9 +295,9 @@
def test_simple_html_substitution(self):
# Unicode characters corresponding to named HTML entites
# are substituted, and no others.
- s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
+ s = "foo\u2200\N{SNOWMAN}\u00f5bar"
self.assertEqual(self.sub.substitute_html(s),
- u"foo∀\N{SNOWMAN}õbar")
+ "foo∀\N{SNOWMAN}õbar")
def test_smart_quote_substitution(self):
# MS smart quotes are a common source of frustration, so we
@@ -362,7 +362,7 @@
def setUp(self):
super(TestEncodingConversion, self).setUp()
- self.unicode_data = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!'
+ self.unicode_data = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!'
self.utf8_data = self.unicode_data.encode("utf-8")
# Just so you know what it looks like.
self.assertEqual(
@@ -382,7 +382,7 @@
ascii = b"a"
soup_from_ascii = self.soup(ascii)
unicode_output = soup_from_ascii.decode()
- self.assertTrue(isinstance(unicode_output, unicode))
+ self.assertTrue(isinstance(unicode_output, str))
self.assertEqual(unicode_output, self.document_for(ascii.decode()))
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
finally:
@@ -394,7 +394,7 @@
# is not set.
soup_from_unicode = self.soup(self.unicode_data)
self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
- self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
+ self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!')
self.assertEqual(soup_from_unicode.original_encoding, None)
def test_utf8_in_unicode_out(self):
@@ -402,7 +402,7 @@
# attribute is set.
soup_from_utf8 = self.soup(self.utf8_data)
self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
- self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
+ self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!')
def test_utf8_out(self):
# The internal data structures can be encoded as UTF-8.
@@ -413,14 +413,14 @@
PYTHON_3_PRE_3_2,
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
def test_attribute_name_containing_unicode_characters(self):
- markup = u''
+ markup = ''
self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
class TestUnicodeDammit(unittest.TestCase):
"""Standalone tests of UnicodeDammit."""
def test_unicode_input(self):
- markup = u"I'm already Unicode! \N{SNOWMAN}"
+ markup = "I'm already Unicode! \N{SNOWMAN}"
dammit = UnicodeDammit(markup)
self.assertEqual(dammit.unicode_markup, markup)
@@ -428,7 +428,7 @@
markup = b"\x91\x92\x93\x94"
dammit = UnicodeDammit(markup)
self.assertEqual(
- dammit.unicode_markup, u"\u2018\u2019\u201c\u201d")
+ dammit.unicode_markup, "\u2018\u2019\u201c\u201d")
def test_smart_quotes_to_xml_entities(self):
markup = b"\x91\x92\x93\x94"
@@ -452,14 +452,14 @@
utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
dammit = UnicodeDammit(utf8)
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
- self.assertEqual(dammit.unicode_markup, u'Sacr\xe9 bleu! \N{SNOWMAN}')
+ self.assertEqual(dammit.unicode_markup, 'Sacr\xe9 bleu! \N{SNOWMAN}')
def test_convert_hebrew(self):
hebrew = b"\xed\xe5\xec\xf9"
dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
- self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
+ self.assertEqual(dammit.unicode_markup, '\u05dd\u05d5\u05dc\u05e9')
def test_dont_see_smart_quotes_where_there_are_none(self):
utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
@@ -468,19 +468,19 @@
self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
def test_ignore_inappropriate_codecs(self):
- utf8_data = u"Räksmörgås".encode("utf-8")
+ utf8_data = "Räksmörgås".encode("utf-8")
dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_ignore_invalid_codecs(self):
- utf8_data = u"Räksmörgås".encode("utf-8")
+ utf8_data = "Räksmörgås".encode("utf-8")
for bad_encoding in ['.utf8', '...', 'utF---16.!']:
dammit = UnicodeDammit(utf8_data, [bad_encoding])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_exclude_encodings(self):
# This is UTF-8.
- utf8_data = u"Räksmörgås".encode("utf-8")
+ utf8_data = "Räksmörgås".encode("utf-8")
# But if we exclude UTF-8 from consideration, the guess is
# Windows-1252.
@@ -496,7 +496,7 @@
detected = EncodingDetector(
b'')
encodings = list(detected.encodings)
- assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings
+ assert 'utf-\N{REPLACEMENT CHARACTER}' in encodings
def test_detect_html5_style_meta_tag(self):
@@ -536,7 +536,7 @@
bs4.dammit.chardet_dammit = noop
dammit = UnicodeDammit(doc)
self.assertEqual(True, dammit.contains_replacement_characters)
- self.assertTrue(u"\ufffd" in dammit.unicode_markup)
+ self.assertTrue("\ufffd" in dammit.unicode_markup)
soup = BeautifulSoup(doc, "html.parser")
self.assertTrue(soup.contains_replacement_characters)
@@ -548,17 +548,17 @@
# A document written in UTF-16LE will have its byte order marker stripped.
data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
dammit = UnicodeDammit(data)
- self.assertEqual(u"áé", dammit.unicode_markup)
+ self.assertEqual("áé", dammit.unicode_markup)
self.assertEqual("utf-16le", dammit.original_encoding)
def test_detwingle(self):
# Here's a UTF8 document.
- utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
+ utf8 = ("\N{SNOWMAN}" * 3).encode("utf8")
# Here's a Windows-1252 document.
windows_1252 = (
- u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
- u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
+ "\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
+ "\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
# Through some unholy alchemy, they've been stuck together.
doc = utf8 + windows_1252 + utf8
@@ -573,7 +573,7 @@
fixed = UnicodeDammit.detwingle(doc)
self.assertEqual(
- u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
+ "☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
def test_detwingle_ignores_multibyte_characters(self):
# Each of these characters has a UTF-8 representation ending
@@ -581,9 +581,9 @@
# Windows-1252. But our code knows to skip over multibyte
# UTF-8 characters, so they'll survive the process unscathed.
for tricky_unicode_char in (
- u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
- u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
- u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
+ "\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
+ "\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
+ "\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
):
input = tricky_unicode_char.encode("utf8")
self.assertTrue(input.endswith(b'\x93'))
@@ -598,29 +598,29 @@
# interesting to know what encoding was claimed
# originally.
- html_unicode = u''
+ html_unicode = ''
html_bytes = html_unicode.encode("ascii")
- xml_unicode= u''
+ xml_unicode= ''
xml_bytes = xml_unicode.encode("ascii")
m = EncodingDetector.find_declared_encoding
- self.assertEquals(None, m(html_unicode, is_html=False))
- self.assertEquals("utf-8", m(html_unicode, is_html=True))
- self.assertEquals("utf-8", m(html_bytes, is_html=True))
-
- self.assertEquals("iso-8859-1", m(xml_unicode))
- self.assertEquals("iso-8859-1", m(xml_bytes))
+ self.assertEqual(None, m(html_unicode, is_html=False))
+ self.assertEqual("utf-8", m(html_unicode, is_html=True))
+ self.assertEqual("utf-8", m(html_bytes, is_html=True))
+
+ self.assertEqual("iso-8859-1", m(xml_unicode))
+ self.assertEqual("iso-8859-1", m(xml_bytes))RefactoringTool: Refactored ./bs4/tests/test_tree.py
# Normally, only the first few kilobytes of a document are checked for
# an encoding.
spacer = b' ' * 5000
- self.assertEquals(None, m(spacer + html_bytes))
- self.assertEquals(None, m(spacer + xml_bytes))
+ self.assertEqual(None, m(spacer + html_bytes))
+ self.assertEqual(None, m(spacer + xml_bytes))
# But you can tell find_declared_encoding to search an entire
# HTML document.
- self.assertEquals(
+ self.assertEqual(
"utf-8",
m(spacer + html_bytes, is_html=True, search_entire_document=True)
)
@@ -628,11 +628,11 @@
# The XML encoding declaration has to be the very first thing
# in the document. We'll allow whitespace before the document
# starts, but nothing else.
- self.assertEquals(
+ self.assertEqual(
"iso-8859-1",
m(xml_bytes, search_entire_document=True)
)
- self.assertEquals(
+ self.assertEqual(
None, m(b'a' + xml_bytes, search_entire_document=True)
)
--- ./bs4/tests/test_tree.py (original)
+++ ./bs4/tests/test_tree.py (refactored)
@@ -71,13 +71,13 @@
self.assertEqual(soup.find("b").string, "2")
def test_unicode_text_find(self):
- soup = self.soup(u'Räksmörgås')
- self.assertEqual(soup.find(string=u'Räksmörgås'), u'Räksmörgås')
+ soup = self.soup('Räksmörgås')
+ self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås')
def test_unicode_attribute_find(self):
- soup = self.soup(u'here it is')
+ soup = self.soup('here it is')
str(soup)
- self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text)
+ self.assertEqual("here it is", soup.find(id='Räksmörgås').text)
def test_find_everything(self):
@@ -97,17 +97,17 @@
"""You can search the tree for text nodes."""
soup = self.soup("Foobar\xbb")
# Exact match.
- self.assertEqual(soup.find_all(string="bar"), [u"bar"])
- self.assertEqual(soup.find_all(text="bar"), [u"bar"])
+ self.assertEqual(soup.find_all(string="bar"), ["bar"])
+ self.assertEqual(soup.find_all(text="bar"), ["bar"])
# Match any of a number of strings.
self.assertEqual(
- soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"])
+ soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"])
# Match a regular expression.
self.assertEqual(soup.find_all(text=re.compile('.*')),
- [u"Foo", u"bar", u'\xbb'])
+ ["Foo", "bar", '\xbb'])
# Match anything.
self.assertEqual(soup.find_all(text=True),
- [u"Foo", u"bar", u'\xbb'])
+ ["Foo", "bar", '\xbb'])
def test_find_all_limit(self):
"""You can limit the number of items returned by find_all."""
@@ -250,8 +250,8 @@
["Matching a.", "Matching b."])
def test_find_all_by_utf8_attribute_value(self):
- peace = u"םולש".encode("utf8")
- data = u''.encode("utf8")
+ peace = "םולש".encode("utf8")
+ data = ''.encode("utf8")
soup = self.soup(data)
self.assertEqual([soup.a], soup.find_all(title=peace))
self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8")))
@@ -440,7 +440,7 @@
# output.
# Since the tag has two children, its .string is None.
- self.assertEquals(None, div.span.string)
+ self.assertEqual(None, div.span.string)
self.assertEqual(7, len(div.contents))
div.smooth()
@@ -751,18 +751,18 @@
# No list of whitespace-preserving tags -> pretty-print
tag._preserve_whitespace_tags = None
- self.assertEquals(True, tag._should_pretty_print(0))
+ self.assertEqual(True, tag._should_pretty_print(0))
# List exists but tag is not on the list -> pretty-print
tag.preserve_whitespace_tags = ["some_other_tag"]
- self.assertEquals(True, tag._should_pretty_print(1))
+ self.assertEqual(True, tag._should_pretty_print(1))
# Indent level is None -> don't pretty-print
- self.assertEquals(False, tag._should_pretty_print(None))
+ self.assertEqual(False, tag._should_pretty_print(None))
# Tag is on the whitespace-preserving list -> don't pretty-print
tag.preserve_whitespace_tags = ["some_other_tag", "a_tag"]
- self.assertEquals(False, tag._should_pretty_print(1))
+ self.assertEqual(False, tag._should_pretty_print(1))
class TestTagCreation(SoupTest):
@@ -901,10 +901,10 @@
assert not isinstance(i, BeautifulSoup)
p1, p2, p3, p4 = list(soup.children)
- self.assertEquals("And now, a word:", p1.string)
- self.assertEquals("p2", p2.string)
- self.assertEquals("p3", p3.string)
- self.assertEquals("And we're back.", p4.string)
+ self.assertEqual("And now, a word:", p1.string)
+ self.assertEqual("p2", p2.string)
+ self.assertEqual("p3", p3.string)
+ self.assertEqual("And we're back.", p4.string)
def test_replace_with_maintains_next_element_throughout(self):
@@ -1249,7 +1249,7 @@
""")
[soup.script.extract() for i in soup.find_all("script")]
- self.assertEqual("\n\n\n", unicode(soup.body))
+ self.assertEqual("\n\n\n", str(soup.body))
def test_extract_works_when_element_is_surrounded_by_identical_strings(self):
@@ -1487,7 +1487,7 @@
soup = BeautifulSoup(b' ', 'html.parser')
encoding = soup.original_encoding
copy = soup.__copy__()
- self.assertEqual(u" ", unicode(copy))
+ self.assertEqual(" ", str(copy))
self.assertEqual(encoding, copy.original_encoding)
def test_copy_preserves_builder_information(self):
@@ -1517,14 +1517,14 @@
def test_unicode_pickle(self):
# A tree containing Unicode characters can be pickled.
- html = u"\N{SNOWMAN}"
+ html = "\N{SNOWMAN}"
soup = self.soup(html)
dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
loaded = pickle.loads(dumped)
self.assertEqual(loaded.decode(), soup.decode())
def test_copy_navigablestring_is_not_attached_to_tree(self):
- html = u"FooBar"
+ html = "FooBar"
soup = self.soup(html)
s1 = soup.find(string="Foo")
s2 = copy.copy(s1)
@@ -1536,7 +1536,7 @@
self.assertEqual(None, s2.previous_element)
def test_copy_navigablestring_subclass_has_same_type(self):
- html = u""
+ html = ""
soup = self.soup(html)
s1 = soup.string
s2 = copy.copy(s1)
@@ -1544,19 +1544,19 @@
self.assertTrue(isinstance(s2, Comment))
def test_copy_entire_soup(self):
- html = u"end"
+ html = "end"
soup = self.soup(html)
soup_copy = copy.copy(soup)
self.assertEqual(soup, soup_copy)
def test_copy_tag_copies_contents(self):
- html = u"end"
+ html = "end"
soup = self.soup(html)
div = soup.div
div_copy = copy.copy(div)
# The two tags look the same, and evaluate to equal.
- self.assertEqual(unicode(div), unicode(div_copy))
+ self.assertEqual(str(div), str(div_copy))
self.assertEqual(div, div_copy)
# But they're not the same object.
@@ -1572,17 +1572,17 @@
class TestSubstitutions(SoupTest):
def test_default_formatter_is_minimal(self):
- markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"
+ markup = "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"
soup = self.soup(markup)
decoded = soup.decode(formatter="minimal")
# The < is converted back into < but the e-with-acute is left alone.
self.assertEqual(
decoded,
self.document_for(
- u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"))
+ "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"))
def test_formatter_html(self):
- markup = u" <<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"
+ markup = " <<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"
soup = self.soup(markup)
decoded = soup.decode(formatter="html")
self.assertEqual(
@@ -1590,7 +1590,7 @@
self.document_for(" <<Sacré bleu!>>"))
def test_formatter_html5(self):
- markup = u" <<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"
+ markup = " <<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"
soup = self.soup(markup)
decoded = soup.decode(formatter="html5")
self.assertEqual(
@@ -1598,49 +1598,49 @@
self.document_for(" <<Sacré bleu!>>"))
def test_formatter_minimal(self):
- markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"
+ markup = "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"
soup = self.soup(markup)
decoded = soup.decode(formatter="minimal")
# The < is converted back into < but the e-with-acute is left alone.
self.assertEqual(
decoded,
self.document_for(
- u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"))
+ "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"))
def test_formatter_null(self):
- markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"
+ markup = "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>"
soup = self.soup(markup)
decoded = soup.decode(formatter=None)
# Neither the angle brackets nor the e-with-acute are converted.
# This is not valid HTML, but it's what the user wanted.
self.assertEqual(decoded,
- self.document_for(u"<>"))
+ self.document_for("<>"))
def test_formatter_custom(self):
- markup = u"<foo>bar "
+ markup = "<foo>bar "
soup = self.soup(markup)
decoded = soup.decode(formatter = lambda x: x.upper())
# Instead of normal entity conversion code, the custom
# callable is called on every string.
self.assertEqual(
decoded,
- self.document_for(u"BAR "))
+ self.document_for("BAR "))
def test_formatter_is_run_on_attribute_values(self):
- markup = u'e'
+ markup = 'e'
soup = self.soup(markup)
a = soup.a
- expect_minimal = u'e'
+ expect_minimal = 'e'
self.assertEqual(expect_minimal, a.decode())
self.assertEqual(expect_minimal, a.decode(formatter="minimal"))
- expect_html = u'e'
+ expect_html = 'e'
self.assertEqual(expect_html, a.decode(formatter="html"))
self.assertEqual(markup, a.decode(formatter=None))
- expect_upper = u'E'
+ expect_upper = 'E'
self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
def test_formatter_skips_script_tag_for_html_documents(self):
@@ -1666,7 +1666,7 @@
# Everything outside the tag is reformatted, but everything
# inside is left alone.
self.assertEqual(
- u'\n foo\n \tbar\n \n \n baz\n \n ',
+ '\n foo\n \tbar\n \n \n baz\n \n ',
soup.div.prettify())
def test_prettify_accepts_formatter_function(self):
@@ -1676,14 +1676,14 @@
def test_prettify_outputs_unicode_by_default(self):
soup = self.soup("")
- self.assertEqual(unicode, type(soup.prettify()))
+ self.assertEqual(str, type(soup.prettify()))
def test_prettify_can_encode_data(self):
soup = self.soup("")
self.assertEqual(bytes, type(soup.prettify("utf-8")))
def test_html_entity_substitution_off_by_default(self):
- markup = u"Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!"
+ markup = "Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!"
soup = self.soup(markup)
encoded = soup.b.encode("utf-8")
self.assertEqual(encoded, markup.encode('utf-8'))
@@ -1727,48 +1727,48 @@
"""Test the ability to encode objects into strings."""
def test_unicode_string_can_be_encoded(self):
- html = u"\N{SNOWMAN}"
+ html = "\N{SNOWMAN}"
soup = self.soup(html)
self.assertEqual(soup.b.string.encode("utf-8"),
- u"\N{SNOWMAN}".encode("utf-8"))
+ "\N{SNOWMAN}".encode("utf-8"))
def test_tag_containing_unicode_string_can_be_encoded(self):
- html = u"\N{SNOWMAN}"
+ html = "\N{SNOWMAN}"
soup = self.soup(html)
self.assertEqual(
soup.b.encode("utf-8"), html.encode("utf-8"))
def test_encoding_substitutes_unrecognized_characters_by_default(self):
- html = u"\N{SNOWMAN}"
+ html = "\N{SNOWMAN}"
soup = self.soup(html)
self.assertEqual(soup.b.encode("ascii"), b"☃")
def test_encoding_can_be_made_strict(self):
- html = u"\N{SNOWMAN}"
+ html = "\N{SNOWMAN}"
soup = self.soup(html)
self.assertRaises(
UnicodeEncodeError, soup.encode, "ascii", errors="strict")
def test_decode_contents(self):
- html = u"\N{SNOWMAN}"
+ html = "\N{SNOWMAN}"
soup = self.soup(html)
- self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents())
+ self.assertEqual("\N{SNOWMAN}", soup.b.decode_contents())
def test_encode_contents(self):
- html = u"\N{SNOWMAN}"
+ html = "\N{SNOWMAN}"
soup = self.soup(html)
self.assertEqual(
- u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
+ "\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
encoding="utf8"))
def test_deprecated_renderContents(self):
- html = u"\N{SNOWMAN}"
+ html = "\N{SNOWMAN}"
soup = self.soup(html)
self.assertEqual(
- u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
+ "\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
def test_repr(self):
- html = u"\N{SNOWMAN}"
+ html = "\N{SNOWMAN}"
soup = self.soup(html)
if PY3K:
self.assertEqual(html, repr(soup))
@@ -1794,8 +1794,8 @@
# attributes() was called on the tag. It filtered out one
# attribute and sorted the other two.
- self.assertEquals(formatter.called_with, soup.p)
- self.assertEquals(u' ', decoded)RefactoringTool: Refactored ./doc/source/conf.py
RefactoringTool: Refactored ./doc.zh/source/conf.py
RefactoringTool: Refactored ./scripts/demonstrate_parser_differences.py
RefactoringTool: Files that were modified:
RefactoringTool: ./setup.py
RefactoringTool: ./bs4/__init__.py
RefactoringTool: ./bs4/check_block.py
RefactoringTool: ./bs4/dammit.py
RefactoringTool: ./bs4/diagnose.py
RefactoringTool: ./bs4/element.py
RefactoringTool: ./bs4/formatter.py
RefactoringTool: ./bs4/testing.py
RefactoringTool: ./bs4/builder/__init__.py
RefactoringTool: ./bs4/builder/_html5lib.py
RefactoringTool: ./bs4/builder/_htmlparser.py
RefactoringTool: ./bs4/builder/_lxml.py
RefactoringTool: ./bs4/tests/__init__.py
RefactoringTool: ./bs4/tests/test_builder_registry.py
RefactoringTool: ./bs4/tests/test_docs.py
RefactoringTool: ./bs4/tests/test_html5lib.py
RefactoringTool: ./bs4/tests/test_htmlparser.py
RefactoringTool: ./bs4/tests/test_lxml.py
RefactoringTool: ./bs4/tests/test_soup.py
RefactoringTool: ./bs4/tests/test_tree.py
RefactoringTool: ./doc/source/conf.py
RefactoringTool: ./doc.zh/source/conf.py
RefactoringTool: ./scripts/demonstrate_parser_differences.py
+ self.assertEqual(formatter.called_with, soup.p)
+ self.assertEqual('', decoded)
class TestNavigableStringSubclasses(SoupTest):
@@ -1914,7 +1914,7 @@
els = self.soup.select('title')
self.assertEqual(len(els), 1)
self.assertEqual(els[0].name, 'title')
- self.assertEqual(els[0].contents, [u'The title'])
+ self.assertEqual(els[0].contents, ['The title'])
def test_one_tag_many(self):
els = self.soup.select('div')
@@ -1960,7 +1960,7 @@
self.assertEqual(dashed[0]['id'], 'dash2')
def test_dashed_tag_text(self):
- self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.')
+ self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, 'Hello there.')
def test_select_dashed_matches_find_all(self):
self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag'))
@@ -2146,12 +2146,12 @@
# Try to select first paragraph
els = self.soup.select('div#inner p:nth-of-type(1)')
self.assertEqual(len(els), 1)
- self.assertEqual(els[0].string, u'Some text')
+ self.assertEqual(els[0].string, 'Some text')
# Try to select third paragraph
els = self.soup.select('div#inner p:nth-of-type(3)')
self.assertEqual(len(els), 1)
- self.assertEqual(els[0].string, u'Another')
+ self.assertEqual(els[0].string, 'Another')
# Try to select (non-existent!) fourth paragraph
els = self.soup.select('div#inner p:nth-of-type(4)')
@@ -2164,7 +2164,7 @@
def test_nth_of_type_direct_descendant(self):
els = self.soup.select('div#inner > p:nth-of-type(1)')
self.assertEqual(len(els), 1)
- self.assertEqual(els[0].string, u'Some text')
+ self.assertEqual(els[0].string, 'Some text')
def test_id_child_selector_nth_of_type(self):
self.assertSelects('#inner > p:nth-of-type(2)', ['p1'])
@@ -2245,7 +2245,7 @@
markup = ''
soup = BeautifulSoup(markup, 'html.parser')
selected = soup.select(".c1, .c2")
- self.assertEquals(3, len(selected))
+ self.assertEqual(3, len(selected))
# Verify that find_all finds the same elements, though because
# of an implementation detail it finds them in a different
--- ./doc/source/conf.py (original)
+++ ./doc/source/conf.py (refactored)
@@ -40,8 +40,8 @@
master_doc = 'index'
# General information about the project.
-project = u'Beautiful Soup'
-copyright = u'2004-2015, Leonard Richardson'
+project = 'Beautiful Soup'
+copyright = '2004-2015, Leonard Richardson'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
@@ -178,8 +178,8 @@
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
- ('index', 'BeautifulSoup.tex', u'Beautiful Soup Documentation',
- u'Leonard Richardson', 'manual'),
+ ('index', 'BeautifulSoup.tex', 'Beautiful Soup Documentation',
+ 'Leonard Richardson', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
@@ -211,18 +211,18 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
- ('index', 'beautifulsoup', u'Beautiful Soup Documentation',
- [u'Leonard Richardson'], 1)
+ ('index', 'beautifulsoup', 'Beautiful Soup Documentation',
+ ['Leonard Richardson'], 1)
]
# -- Options for Epub output ---------------------------------------------------
# Bibliographic Dublin Core info.
-epub_title = u'Beautiful Soup'
-epub_author = u'Leonard Richardson'
-epub_publisher = u'Leonard Richardson'
-epub_copyright = u'2012, Leonard Richardson'
+epub_title = 'Beautiful Soup'
+epub_author = 'Leonard Richardson'
+epub_publisher = 'Leonard Richardson'
+epub_copyright = '2012, Leonard Richardson'
# The language of the text. It defaults to the language option
# or en if the language is not set.
--- ./doc.zh/source/conf.py (original)
+++ ./doc.zh/source/conf.py (refactored)
@@ -40,8 +40,8 @@
master_doc = 'index'
# General information about the project.
-project = u'Beautiful Soup'
-copyright = u'2012, Leonard Richardson'
+project = 'Beautiful Soup'
+copyright = '2012, Leonard Richardson'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
@@ -178,8 +178,8 @@
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
- ('index', 'BeautifulSoup.tex', u'Beautiful Soup Documentation',
- u'Leonard Richardson', 'manual'),
+ ('index', 'BeautifulSoup.tex', 'Beautiful Soup Documentation',
+ 'Leonard Richardson', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
@@ -211,18 +211,18 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
- ('index', 'beautifulsoup', u'Beautiful Soup Documentation',
- [u'Leonard Richardson'], 1)
+ ('index', 'beautifulsoup', 'Beautiful Soup Documentation',
+ ['Leonard Richardson'], 1)
]
# -- Options for Epub output ---------------------------------------------------
# Bibliographic Dublin Core info.
-epub_title = u'Beautiful Soup'
-epub_author = u'Leonard Richardson'
-epub_publisher = u'Leonard Richardson'
-epub_copyright = u'2012, Leonard Richardson'
+epub_title = 'Beautiful Soup'
+epub_author = 'Leonard Richardson'
+epub_publisher = 'Leonard Richardson'
+epub_copyright = '2012, Leonard Richardson'
# The language of the text. It defaults to the language option
# or en if the language is not set.
--- ./scripts/demonstrate_parser_differences.py (original)
+++ ./scripts/demonstrate_parser_differences.py (refactored)
@@ -22,13 +22,13 @@
try:
from bs4.builder import _lxml
parsers.append('lxml')
-except ImportError, e:
+except ImportError as e:
pass
try:
from bs4.builder import _html5lib
parsers.append('html5lib')
-except ImportError, e:
+except ImportError as e:
pass
class Demonstration(object):
@@ -47,7 +47,7 @@
output = soup.div
else:
output = soup
- except Exception, e:
+ except Exception as e:
output = "[EXCEPTION] %s" % str(e)
self.results[parser] = output
if previous_output is None:
@@ -57,15 +57,15 @@
return uniform_results
def dump(self):
- print "%s: %s" % ("Markup".rjust(13), self.markup.encode("utf8"))
- for parser, output in self.results.items():
- print "%s: %s" % (parser.rjust(13), output.encode("utf8"))
+ print("%s: %s" % ("Markup".rjust(13), self.markup.encode("utf8")))
+ for parser, output in list(self.results.items()):
+ print("%s: %s" % (parser.rjust(13), output.encode("utf8")))
different_results = []
uniform_results = []
-print "= Testing the following parsers: %s =" % ", ".join(parsers)
-print
+print("= Testing the following parsers: %s =" % ", ".join(parsers))
+print()
input_file = sys.stdin
if sys.stdin.isatty():
@@ -83,13 +83,13 @@
else:
different_results.append(demo)
-print "== Markup that's handled the same in every parser =="
-print
+print("== Markup that's handled the same in every parser ==")
+print()
for demo in uniform_results:
demo.dump()
- print
-print "== Markup that's not handled the same in every parser =="
-print
+ print()
+print("== Markup that's not handled the same in every parser ==")
+print()
for demo in different_results:
demo.dump()
- print
+ print()
+ CFLAGS='-O2 -g -pipe -Wformat -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -fstack-protector --param=ssp-buffer-size=4 -fasynchronous-unwind-tables'
+ LDFLAGS=' -Wl,--as-needed -Wl,--no-undefined -Wl,-z,relro -Wl,-O1 -Wl,--build-id -Wl,--enable-new-dtags'
+ /usr/bin/python3 setup.py build '--executable=/usr/bin/python3 -s'
running build
running build_py
creating build
creating build/lib
creating build/lib/bs4
copying bs4/testing.py -> build/lib/bs4
copying bs4/element.py -> build/lib/bs4
copying bs4/dammit.py -> build/lib/bs4
copying bs4/check_block.py -> build/lib/bs4
copying bs4/formatter.py -> build/lib/bs4
copying bs4/diagnose.py -> build/lib/bs4
copying bs4/__init__.py -> build/lib/bs4
creating build/lib/bs4/tests
copying bs4/tests/test_html5lib.py -> build/lib/bs4/tests
copying bs4/tests/test_tree.py -> build/lib/bs4/tests
copying bs4/tests/test_lxml.py -> build/lib/bs4/tests
copying bs4/tests/test_htmlparser.py -> build/lib/bs4/tests
copying bs4/tests/test_soup.py -> build/lib/bs4/tests
copying bs4/tests/test_docs.py -> build/lib/bs4/tests
copying bs4/tests/__init__.py -> build/lib/bs4/tests
copying bs4/tests/test_builder_registry.py -> build/lib/bs4/tests
creating build/lib/bs4/builder
copying bs4/builder/_html5lib.py -> build/lib/bs4/builder
copying bs4/builder/_htmlparser.py -> build/lib/bs4/builder
copying bs4/builder/__init__.py -> build/lib/bs4/builder
copying bs4/builder/_lxml.py -> build/lib/bs4/builder
Fixing build/lib/bs4/testing.py build/lib/bs4/element.py build/lib/bs4/dammit.py build/lib/bs4/check_block.py build/lib/bs4/formatter.py build/lib/bs4/diagnose.py build/lib/bs4/__init__.py build/lib/bs4/tests/test_html5lib.py build/lib/bs4/tests/test_tree.py build/lib/bs4/tests/test_lxml.py build/lib/bs4/tests/test_htmlparser.py build/lib/bs4/tests/test_soup.py build/lib/bs4/tests/test_docs.py build/lib/bs4/tests/__init__.py build/lib/bs4/tests/test_builder_registry.py build/lib/bs4/builder/_html5lib.py build/lib/bs4/builder/_htmlparser.py build/lib/bs4/builder/__init__.py build/lib/bs4/builder/_lxml.py
Skipping optional fixer: buffer
Skipping optional fixer: idioms
Skipping optional fixer: set_literal
Skipping optional fixer: ws_comma
Fixing build/lib/bs4/testing.py build/lib/bs4/element.py build/lib/bs4/dammit.py build/lib/bs4/check_block.py build/lib/bs4/formatter.py build/lib/bs4/diagnose.py build/lib/bs4/__init__.py build/lib/bs4/tests/test_html5lib.py build/lib/bs4/tests/test_tree.py build/lib/bs4/tests/test_lxml.py build/lib/bs4/tests/test_htmlparser.py build/lib/bs4/tests/test_soup.py build/lib/bs4/tests/test_docs.py build/lib/bs4/tests/__init__.py build/lib/bs4/tests/test_builder_registry.py build/lib/bs4/builder/_html5lib.py build/lib/bs4/builder/_htmlparser.py build/lib/bs4/builder/__init__.py build/lib/bs4/builder/_lxml.py
Skipping optional fixer: buffer
Skipping optional fixer: idioms
Skipping optional fixer: set_literal
Skipping optional fixer: ws_comma
+ RPM_EC=0
++ jobs -p
+ exit 0
Executing(%install): /bin/sh -e /home/iurt/rpmbuild/tmp/rpm-tmp.PgN4M8
+ umask 022
+ cd /home/iurt/rpmbuild/BUILD
+ '[' 1 -eq 1 ']'
+ '[' /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64 '!=' / ']'
+ rm -rf /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64
++ dirname /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64
+ mkdir -p /home/iurt/rpmbuild/BUILDROOT
+ mkdir /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64
+ cd beautifulsoup4-4.8.1
+ '[' 1 -eq 1 ']'
+ CFLAGS='-O2 -g -pipe -Wformat -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -fstack-protector --param=ssp-buffer-size=4 -fasynchronous-unwind-tables'
+ LDFLAGS=' -Wl,--as-needed -Wl,--no-undefined -Wl,-z,relro -Wl,-O1 -Wl,--build-id -Wl,--enable-new-dtags'
+ /usr/bin/python3 setup.py install -O1 --skip-build --root /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64
running install
running install_lib
creating /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr
creating /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib
creating /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8
creating /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages
creating /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4
copying build/lib/bs4/testing.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4
creating /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests
copying build/lib/bs4/tests/test_html5lib.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests
copying build/lib/bs4/tests/test_tree.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests
copying build/lib/bs4/tests/test_lxml.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests
copying build/lib/bs4/tests/test_htmlparser.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests
copying build/lib/bs4/tests/test_soup.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests
copying build/lib/bs4/tests/test_docs.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests
copying build/lib/bs4/tests/__init__.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests
copying build/lib/bs4/tests/test_builder_registry.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests
copying build/lib/bs4/element.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4
copying build/lib/bs4/dammit.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4
copying build/lib/bs4/check_block.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4
copying build/lib/bs4/formatter.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4
creating /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/builder
copying build/lib/bs4/builder/_html5lib.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/builder
copying build/lib/bs4/builder/_htmlparser.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/builder
copying build/lib/bs4/builder/__init__.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/builder
copying build/lib/bs4/builder/_lxml.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/builder
copying build/lib/bs4/diagnose.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4
copying build/lib/bs4/__init__.py -> /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/testing.py to testing.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests/test_html5lib.py to test_html5lib.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests/test_tree.py to test_tree.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests/test_lxml.py to test_lxml.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests/test_htmlparser.py to test_htmlparser.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests/test_soup.py to test_soup.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests/test_docs.py to test_docs.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests/__init__.py to __init__.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/tests/test_builder_registry.py to test_builder_registry.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/element.py to element.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/dammit.py to dammit.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/check_block.py to check_block.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/formatter.py to formatter.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/builder/_html5lib.py to _html5lib.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/builder/_htmlparser.py to _htmlparser.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/builder/__init__.py to __init__.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/builder/_lxml.py to _lxml.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/diagnose.py to diagnose.cpython-38.pyc
byte-compiling /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/bs4/__init__.py to __init__.cpython-38.pyc
writing byte-compilation script '/tmp/tmpzl42ipi6.py'
/usr/bin/python3 /tmp/tmpzl42ipi6.py
removing /tmp/tmpzl42ipi6.py
running install_egg_info
running egg_info
writing beautifulsoup4.egg-info/PKG-INFO
writing dependency_links to beautifulsoup4.egg-info/dependency_links.txt
writing requirements to beautifulsoup4.egg-info/requires.txt
writing top-level names to beautifulsoup4.egg-info/top_level.txt
reading manifest file 'beautifulsoup4.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
writing manifest file 'beautifulsoup4.egg-info/SOURCES.txt'
Copying beautifulsoup4.egg-info to /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8/site-packages/beautifulsoup4-4.8.1-py3.8.egg-info
running install_scripts
+ /usr/lib/rpm/find-debuginfo.sh -j16 --strict-build-id -m -i --build-id-seed 4.8.1-1.mga8 --unique-debug-suffix -4.8.1-1.mga8.aarch64 --unique-debug-src-base python-beautifulsoup4-4.8.1-1.mga8.aarch64 --run-dwz --dwz-low-mem-die-limit 10000000 --dwz-max-die-limit 50000000 -S debugsourcefiles.list /home/iurt/rpmbuild/BUILD/beautifulsoup4-4.8.1
+ '[' -n '' ']'
+ /usr/share/spec-helper/clean_files
+ '[' -n '' ']'
+ /usr/share/spec-helper/compress_files .xz
+ '[' -n '' ']'
+ /usr/share/spec-helper/relink_symlinks
+ '[' -n '' ']'
+ /usr/share/spec-helper/clean_perl
+ '[' -n '' ']'
+ /usr/share/spec-helper/lib_symlinks
+ '[' -n '' ']'
+ /usr/share/spec-helper/gprintify
+ '[' -n '' ']'
+ /usr/share/spec-helper/fix_mo
+ '[' -n '' ']'
+ /usr/share/spec-helper/translate_menu
+ '[' -n '' ']'
+ /usr/share/spec-helper/fix_pamd
+ '[' -n '' ']'
+ /usr/share/spec-helper/remove_info_dir
+ '[' -n '' ']'
+ /usr/share/spec-helper/fix_eol
+ '[' -n '' ']'
+ /usr/share/spec-helper/check_desktop_files
+ '[' -n '' ']'
+ /usr/share/spec-helper/check_elf_files
+ /usr/lib/rpm/brp-python-bytecompile /usr/bin/python 1 1
Bytecompiling .py files below /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64/usr/lib/python3.8 using /usr/bin/python3.8
+ /usr/lib/rpm/brp-python-hardlink
+ /usr/lib/rpm/redhat/brp-mangle-shebangs
Processing files: python3-beautifulsoup4-4.8.1-1.mga8.noarch
Provides: python-beautifulsoup4 = 4.8.1-1.mga8 python3-beautifulsoup4 = 4.8.1-1.mga8 python3.8dist(beautifulsoup4) = 4.8.1 python3dist(beautifulsoup4) = 4.8.1 pythonegg(3)(beautifulsoup4) = 4.8.1
Requires(rpmlib): rpmlib(CompressedFileNames) <= 3.0.4-1 rpmlib(FileDigests) <= 4.6.0-1 rpmlib(PartialHardlinkSets) <= 4.0.4-1 rpmlib(PayloadFilesHavePrefix) <= 4.0-1
Requires: python(abi) = 3.8
Obsoletes: python-beautifulsoup4 < 4.8.1-1.mga8
Checking for unpackaged file(s): /usr/lib/rpm/check-files /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64
Wrote: /home/iurt/rpmbuild/RPMS/noarch/python3-beautifulsoup4-4.8.1-1.mga8.noarch.rpm
Executing(%clean): /bin/sh -e /home/iurt/rpmbuild/tmp/rpm-tmp.DNimBb
+ umask 022
+ cd /home/iurt/rpmbuild/BUILD
+ cd beautifulsoup4-4.8.1
+ /usr/bin/rm -rf /home/iurt/rpmbuild/BUILDROOT/python-beautifulsoup4-4.8.1-1.mga8.aarch64
+ RPM_EC=0
++ jobs -p
+ exit 0
Executing(--clean): /bin/sh -e /home/iurt/rpmbuild/tmp/rpm-tmp.rllipb
+ umask 022
+ cd /home/iurt/rpmbuild/BUILD
+ rm -rf beautifulsoup4-4.8.1
+ RPM_EC=0
++ jobs -p
+ exit 0
D: [iurt_root_command] Success!
|