Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

import logging 

 

try:  # Python 3 

    from urllib.parse import urljoin 

except ImportError: 

    from urlparse import urljoin 

 

from ._collections import RecentlyUsedContainer 

from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool 

from .connectionpool import port_by_scheme 

from .exceptions import LocationValueError, MaxRetryError 

from .request import RequestMethods 

from .util.url import parse_url 

from .util.retry import Retry 

 

 

__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] 

 

 

pool_classes_by_scheme = { 

    'http': HTTPConnectionPool, 

    'https': HTTPSConnectionPool, 

} 

 

log = logging.getLogger(__name__) 

 

SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', 

                'ssl_version') 

 

 

class PoolManager(RequestMethods): 

    """ 

    Allows for arbitrary requests while transparently keeping track of 

    necessary connection pools for you. 

 

    :param num_pools: 

        Number of connection pools to cache before discarding the least 

        recently used pool. 

 

    :param headers: 

        Headers to include with all requests, unless other headers are given 

        explicitly. 

 

    :param \**connection_pool_kw: 

        Additional parameters are used to create fresh 

        :class:`urllib3.connectionpool.ConnectionPool` instances. 

 

    Example:: 

 

        >>> manager = PoolManager(num_pools=2) 

        >>> r = manager.request('GET', 'http://google.com/') 

        >>> r = manager.request('GET', 'http://google.com/mail') 

        >>> r = manager.request('GET', 'http://yahoo.com/') 

        >>> len(manager.pools) 

        2 

 

    """ 

 

    proxy = None 

 

    def __init__(self, num_pools=10, headers=None, **connection_pool_kw): 

        RequestMethods.__init__(self, headers) 

        self.connection_pool_kw = connection_pool_kw 

        self.pools = RecentlyUsedContainer(num_pools, 

                                           dispose_func=lambda p: p.close()) 

 

    def __enter__(self): 

        return self 

 

    def __exit__(self, exc_type, exc_val, exc_tb): 

        self.clear() 

        # Return False to re-raise any potential exceptions 

        return False 

 

    def _new_pool(self, scheme, host, port): 

        """ 

        Create a new :class:`ConnectionPool` based on host, port and scheme. 

 

        This method is used to actually create the connection pools handed out 

        by :meth:`connection_from_url` and companion methods. It is intended 

        to be overridden for customization. 

        """ 

        pool_cls = pool_classes_by_scheme[scheme] 

        kwargs = self.connection_pool_kw 

        if scheme == 'http': 

            kwargs = self.connection_pool_kw.copy() 

            for kw in SSL_KEYWORDS: 

                kwargs.pop(kw, None) 

 

        return pool_cls(host, port, **kwargs) 

 

    def clear(self): 

        """ 

        Empty our store of pools and direct them all to close. 

 

        This will not affect in-flight connections, but they will not be 

        re-used after completion. 

        """ 

        self.pools.clear() 

 

    def connection_from_host(self, host, port=None, scheme='http'): 

        """ 

        Get a :class:`ConnectionPool` based on the host, port, and scheme. 

 

        If ``port`` isn't given, it will be derived from the ``scheme`` using 

        ``urllib3.connectionpool.port_by_scheme``. 

        """ 

 

        if not host: 

            raise LocationValueError("No host specified.") 

 

        scheme = scheme or 'http' 

        port = port or port_by_scheme.get(scheme, 80) 

        pool_key = (scheme, host, port) 

 

        with self.pools.lock: 

            # If the scheme, host, or port doesn't match existing open 

            # connections, open a new ConnectionPool. 

            pool = self.pools.get(pool_key) 

            if pool: 

                return pool 

 

            # Make a fresh ConnectionPool of the desired type 

            pool = self._new_pool(scheme, host, port) 

            self.pools[pool_key] = pool 

 

        return pool 

 

    def connection_from_url(self, url): 

        """ 

        Similar to :func:`urllib3.connectionpool.connection_from_url` but 

        doesn't pass any additional parameters to the 

        :class:`urllib3.connectionpool.ConnectionPool` constructor. 

 

        Additional parameters are taken from the :class:`.PoolManager` 

        constructor. 

        """ 

        u = parse_url(url) 

        return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 

 

    def urlopen(self, method, url, redirect=True, **kw): 

        """ 

        Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` 

        with custom cross-host redirect logic and only sends the request-uri 

        portion of the ``url``. 

 

        The given ``url`` parameter must be absolute, such that an appropriate 

        :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. 

        """ 

        u = parse_url(url) 

        conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 

 

        kw['assert_same_host'] = False 

        kw['redirect'] = False 

        if 'headers' not in kw: 

            kw['headers'] = self.headers 

 

        if self.proxy is not None and u.scheme == "http": 

            response = conn.urlopen(method, url, **kw) 

        else: 

            response = conn.urlopen(method, u.request_uri, **kw) 

 

        redirect_location = redirect and response.get_redirect_location() 

        if not redirect_location: 

            return response 

 

        # Support relative URLs for redirecting. 

        redirect_location = urljoin(url, redirect_location) 

 

        # RFC 7231, Section 6.4.4 

        if response.status == 303: 

            method = 'GET' 

 

        retries = kw.get('retries') 

        if not isinstance(retries, Retry): 

            retries = Retry.from_int(retries, redirect=redirect) 

 

        try: 

            retries = retries.increment(method, url, response=response, _pool=conn) 

        except MaxRetryError: 

            if retries.raise_on_redirect: 

                raise 

            return response 

 

        kw['retries'] = retries 

        kw['redirect'] = redirect 

 

        log.info("Redirecting %s -> %s" % (url, redirect_location)) 

        return self.urlopen(method, redirect_location, **kw) 

 

 

class ProxyManager(PoolManager): 

    """ 

    Behaves just like :class:`PoolManager`, but sends all requests through 

    the defined proxy, using the CONNECT method for HTTPS URLs. 

 

    :param proxy_url: 

        The URL of the proxy to be used. 

 

    :param proxy_headers: 

        A dictionary contaning headers that will be sent to the proxy. In case 

        of HTTP they are being sent with each request, while in the 

        HTTPS/CONNECT case they are sent only once. Could be used for proxy 

        authentication. 

 

    Example: 

        >>> proxy = urllib3.ProxyManager('http://localhost:3128/') 

        >>> r1 = proxy.request('GET', 'http://google.com/') 

        >>> r2 = proxy.request('GET', 'http://httpbin.org/') 

        >>> len(proxy.pools) 

        1 

        >>> r3 = proxy.request('GET', 'https://httpbin.org/') 

        >>> r4 = proxy.request('GET', 'https://twitter.com/') 

        >>> len(proxy.pools) 

        3 

 

    """ 

 

    def __init__(self, proxy_url, num_pools=10, headers=None, 

                 proxy_headers=None, **connection_pool_kw): 

 

        if isinstance(proxy_url, HTTPConnectionPool): 

            proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, 

                                        proxy_url.port) 

        proxy = parse_url(proxy_url) 

        if not proxy.port: 

            port = port_by_scheme.get(proxy.scheme, 80) 

            proxy = proxy._replace(port=port) 

 

        assert proxy.scheme in ("http", "https"), \ 

            'Not supported proxy scheme %s' % proxy.scheme 

 

        self.proxy = proxy 

        self.proxy_headers = proxy_headers or {} 

 

        connection_pool_kw['_proxy'] = self.proxy 

        connection_pool_kw['_proxy_headers'] = self.proxy_headers 

 

        super(ProxyManager, self).__init__( 

            num_pools, headers, **connection_pool_kw) 

 

    def connection_from_host(self, host, port=None, scheme='http'): 

        if scheme == "https": 

            return super(ProxyManager, self).connection_from_host( 

                host, port, scheme) 

 

        return super(ProxyManager, self).connection_from_host( 

            self.proxy.host, self.proxy.port, self.proxy.scheme) 

 

    def _set_proxy_headers(self, url, headers=None): 

        """ 

        Sets headers needed by proxies: specifically, the Accept and Host 

        headers. Only sets headers not provided by the user. 

        """ 

        headers_ = {'Accept': '*/*'} 

 

        netloc = parse_url(url).netloc 

        if netloc: 

            headers_['Host'] = netloc 

 

        if headers: 

            headers_.update(headers) 

        return headers_ 

 

    def urlopen(self, method, url, redirect=True, **kw): 

        "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." 

        u = parse_url(url) 

 

        if u.scheme == "http": 

            # For proxied HTTPS requests, httplib sets the necessary headers 

            # on the CONNECT to the proxy. For HTTP, we'll definitely 

            # need to set 'Host' at the very least. 

            headers = kw.get('headers', self.headers) 

            kw['headers'] = self._set_proxy_headers(url, headers) 

 

        return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) 

 

 

def proxy_from_url(url, **kw): 

    return ProxyManager(proxy_url=url, **kw)