PDA

View Full Version : Handling HTTP proxies with or without authentication


jannewmarch
August 14th, 2008, 12:49 PM
Hi

I often have to connect through an HTTP proxy, and it requires authentication as well. I need to read my comics at work... So I came up with the following which uses the Gnome gconf settings (view/change by gconf-editor). I have tested it with: no proxy; non-auth proxy and auth proxy and it seems to work okay, Platform: Ubuntu 8.0.4, kernel 2.6.24-19, screenlets 0.0.12


import urllib2
import gconf

gconf_client = gconf.client_get_default()

# Equivalent to urllib2.urlopen(url) but uses proxies if specified by GConf
# Call by self.urlopen(url)
def urlopen(self, URL):
# get info from GConf about possible proxies, auth, etc
# requires info to be set in Gnome by e.g. gconf-edit
# We query on need: we won't be making so many queries that we need
# to listen and track changes

use_proxy = self.gconf_client.get_bool("/system/http_proxy/use_http_proxy")
if not use_proxy:
# no proxy, direct connection
print "Direct connection to " + URL
proxy_support = urllib2.ProxyHandler({})

else:
# TODO: check our URL isn't in ignore_hosts list
# not very likely though, and too messy

proxy_host = self.gconf_client.get_string("/system/http_proxy/host")
proxy_port = self.gconf_client.get_int("/system/http_proxy/port")
if proxy_host == None or proxy_port == 0:
raise Exception("GConf: proxy_host and proxy_port cannot be None")


use_auth = self.gconf_client.get_bool("/system/http_proxy/use_authentication")

if not use_auth:
# simple proxy without auth
print "Using simple proxy"
proxy_info = {
'host': proxy_host,
'port': proxy_port
}
proxy_support = urllib2.ProxyHandler(
{'http': "http://%(host)s:%(port)d" % proxy_info}
)
else:
# proxy with authentication
print "Using proxy with auth"
auth_password = self.gconf_client.get_string("/system/http_proxy/authentication_password")
auth_user = self.gconf_client.get_string("/system/http_proxy/authentication_user")
if auth_user == None or auth_password == None:
raise Exception("GConf: authentication_user and authentication_password cannot be None")
# code from Andre Bocchini <lists@andrebocchini.com>
# at http://bytes.com/forum/thread22918.html
proxy_info = {
'user': auth_user,
'pass': auth_password,
'host': proxy_host,
'port': proxy_port
}
proxy_support = urllib2.ProxyHandler(
{'http': "http://%(user)s:%(pass)s@%(host)s:%(port)d" % proxy_info}
)
opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)
return urllib2.urlopen(URL)


I've used this in ComicStripScreenlet and will be putting this into other screenlets (e.g. ClearWeather) that break behind HTTP proxies.

Note: the code does not handle working through an autoproxy proxy.pac setting ("system/proxy/mode" set to "auto" and "system/proxy/autoconfig_url" set). The autoproxy file contains a JavaScript function. There is Python module at http://code.google.com/p/pacparser that can handle these files, but this requires installation of O/S specific C libraries to handle the Spidermonkey Javascript module. It isn't present on my system and I don't want to force installation. So if you use autoproxy, for now duplicate the proxy host/port settings explicitly and this code will ALWAYS go through the proxy.

Cheers

Jan
--

jannewmarch
August 17th, 2008, 03:24 AM
Here is an updated version that will handle autoconfiguration files (proxy.pac) as well:

import urllib2
import gconf
import urlparse
import tempfile
import os

pacparser = None
try:
import pacparser
except ImportError:
print 'If you need autoproxy support, download pacparser from code.google.com/p/pacparser'

def urlopen(self, URL):
# get info from GConf about possible proxies, auth, etc
# requires info to be set in Gnome by e.g. gconf-edit
# We query on need: we won't be making so many queries that we need
# to listen and track changes

proxy_support = self.get_proxy_support(URL)
opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)
return urllib2.urlopen(URL)

def get_proxy_support(self, URL):
# Caution: makes use of return statements to
# avoid convoluted logic
gconf_client = gconf.client_get_default()

use_proxy = gconf_client.get_bool("/system/http_proxy/use_http_proxy")
if not use_proxy:
# no proxy, direct connection
print "Direct connection to " + URL
return urllib2.ProxyHandler({})

mode = gconf_client.get_string("/system/proxy/mode")
if mode == 'none':
raise Exception("Proxy mode can't be none if proxy requested")

# handle autoproxy if we have proxypac installed
if mode == 'auto' and proxpac != None:
print 'Getting proxy from autoproxy'

autoconfig_url = gconf_client.get_string("/system/proxy/autoconfig_url")
if autoconfig_url == None:
raise Exception("Autoproxy cannot be null")


# pacparser needs the autoconfig in a local file :-(
autoconfig_contents = urllib2.urlopen(autoconfig_url).read()
pac_file_desc, pac_file = tempfile.mkstemp('.pac')
pac_f = open(pac_file, 'w')
pac_f.write(autoconfig_contents)
pac_f.close()

# get the host part of the URL http://host:.../...?...
host = urlparse.urlsplit(URL)[1].split(':')[0]
print 'URL host part ' + host

# Now ask about our URL from the local autoproxy file
possible_proxy = pacparser.just_find_proxy(pac_file,
'http://' + host,
host)
os.remove(pac_file)

if possible_proxy == "DIRECT":
# no proxy, direct connection
print "Direct connection to " + URL
return urllib2.ProxyHandler({})
else:
proxy_host_port = possible_proxy.replace('PROXY', '', 1).strip()
proxy_host, proxy_port = proxy_host_port.split(':')
proxy_port = int(proxy_port)

# manual mode, or try to manage autoproxy without proxypac
elif mode == 'manual' or proxypac == None:
print 'Getting proxy from manual setting'

# TODO: check our URL isn't in ignore_hosts list
# not very likely though, and too messy

proxy_host = gconf_client.get_string("/system/http_proxy/host")
proxy_port = gconf_client.get_int("/system/http_proxy/port")

# If we've got here, should have a valid proxy host and port
# unless they haven't been set properly in the GConf d/b
print 'proxy host ' + proxy_host
print 'proxy port ' + str(proxy_port)

if proxy_host == None or proxy_port == 0:
raise Exception("GConf: proxy_host and proxy_port cannot be None")

# Now we start looking at authentication...
use_auth = gconf_client.get_bool("/system/http_proxy/use_authentication")

if not use_auth:
# simple proxy without auth
print "Using proxy without authentication"
proxy_info = {
'host': proxy_host,
'port': proxy_port
}
return urllib2.ProxyHandler(
{'http': "http://%(host)s:%(port)d" % proxy_info}
)
# proxy with authentication
print "Using proxy with authentication"
auth_password = gconf_client.get_string("/system/http_proxy/authentication_password")
auth_user = gconf_client.get_string("/system/http_proxy/authentication_user")
if auth_user == None or auth_password == None:
raise Exception("GConf: authentication_user and authentication_password cannot be None")
# code from Andre Bocchini <lists@andrebocchini.com>
# at http://bytes.com/forum/thread22918.html
proxy_info = {
'user': auth_user,
'pass': auth_password,
'host': proxy_host,
'port': proxy_port
}
return urllib2.ProxyHandler(
{'http': "http://%(user)s:%(pass)s@%(host)s:%(port)d" % proxy_info}

mrbishop
September 17th, 2008, 08:17 PM
I'm sorry, but that looks like exactly what I need, but how does one go about using that code?

You say "I've used this in ComicStripScreenlet and will be putting this into other screenlets", but how do you "put it in"?

Could you be specific on how I could use this code?