1
2
3 """
4 URL class
5
6 @copyright: 2006 Thomas Waldmann
7 @license: GNU GPL, see COPYING for details
8 """
9 import posixpath, urllib, urlparse, cgi
10
11 class URL(object):
12 """ represents an Uniform Resource Locator """
13 url_coding = 'utf-8'
14 default_ports = { 'ftp': 21, 'http': 80, 'https': 443, }
15
16 def __init__(self, url=None, **kw):
17 """create URL object
18
19 If url is given, it can be:
20 * str (will be parsed and also stored "as is")
21 * unicode (will be encoded, then: see str)
22 * another URL object
23
24 If url is given, it will set the defaults. Those defaults
25 can be modified by specifying a different value as kw argument.
26
27 If url is not given, you can also give all pieces needed to
28 make one by kw arguments:
29 * scheme
30 * server
31 * alternatively: host and maybe port
32 * path
33 * fragment (e.g. "anchorid")
34 * and named query args
35 """
36
37 self._url = None
38 self._scheme = 'http'
39 self._server = ''
40 self._host = ''
41 self._port = None
42 self._path = ''
43 self._query = ''
44 self._querydict = {}
45 self._fragment = ''
46 if isinstance(url, URL):
47
48 self._scheme = url.scheme
49 self._server = url.server
50
51
52 self._path = url.path
53 self._query = url.query
54 self._querydict = url.querydict
55 self._fragment = url.fragment
56 elif isinstance(url, str):
57 self.url = url
58 elif isinstance(url, unicode):
59 self.url = url.encode(self.url_coding)
60 elif url is None:
61 pass
62 else:
63 raise AttributeError, "unexpected argument type for url_default"
64
65
66 if kw:
67 self._update(**kw)
68
69 def _update(self, **kw):
70 """ update what we have from kw, care for unicode encoding """
71 for key in ('scheme', 'server', 'host', 'port', 'path', 'fragment',):
72 if kw.has_key(key):
73 value = kw[key]
74 del kw[key]
75 if isinstance(value, unicode):
76 value = value.encode(self.url_coding)
77 getattr(self, '_set_%s' % key)(value)
78
79 qd = self.querydict
80 for key, value in kw.items():
81 if value is None:
82 try:
83 del qd[key]
84 except KeyError:
85 pass
86 elif isinstance(value, unicode):
87 qd[key] = value.encode(self.url_coding)
88 else:
89 qd[key] = str(value)
90 self.querydict = qd
91
92 def _split(self, url):
93 """ wrapper around urlparse.urlsplit """
94 self._scheme, self._server, self._path, self._query, self._fragment = urlparse.urlsplit(url)
95
96 host_port = self._server.split(':', 1)
97 self._host = host_port[0]
98 try:
99 self._port = int(host_port[1])
100 except (ValueError, IndexError), err:
101 self._port = URL.default_ports.get(self._scheme)
102
103 def __str__(self):
104 return self.url
105
106 def __repr__(self):
107 return '<URL "%s">' % self
108
109 def _join_host_port(self):
110 """ join host and port components to make a server string """
111 port = self._port
112 def_port = URL.default_ports.get(self._scheme)
113 if port == def_port or port is None:
114 self._server = self._host
115 else:
116 self._server = "%s:%d" % (self._host, port)
117
118 def _get_scheme(self):
119 if not hasattr(self, '_scheme'):
120 self._split(self._url)
121 return self._scheme
122 def _set_scheme(self, v):
123 if v != self._scheme:
124 self._scheme = v
125 del self.url
126 def _del_scheme(self):
127 try:
128 del self._scheme
129 except AttributeError:
130 pass
131
132 def _get_server(self):
133 if not hasattr(self, '_server'):
134 self._split(self._url)
135 return self._server
136 def _set_server(self, v):
137 if v != self._server:
138 self._server = v
139 del self.host
140 del self.port
141 del self.url
142 def _del_server(self):
143 try:
144 del self._server
145 except AttributeError:
146 pass
147
148 def _get_host(self):
149 if not hasattr(self, '_host'):
150 self._split(self._url)
151 return self._host
152 def _set_host(self, v):
153 if v != self._host:
154 self._host = v
155 self._join_host_port()
156 del self.url
157 def _del_host(self):
158 try:
159 del self._host
160 except AttributeError:
161 pass
162
163 def _get_port(self):
164 if not hasattr(self, '_port'):
165 self._split(self._url)
166 return self._port
167 def _set_port(self, v):
168 if v != self._port:
169 self._port = v
170 self._join_host_port()
171 del self.url
172 def _del_port(self):
173 try:
174 del self._port
175 except AttributeError:
176 pass
177
178 def _get_path(self):
179 if not hasattr(self, '_path'):
180 self._split(self._url)
181 return self._path
182 def _set_path(self, v):
183 if v != self._path:
184 self._path = v
185 del self.url
186 def _del_path(self):
187 try:
188 del self._path
189 except AttributeError:
190 pass
191
192 def _get_query(self):
193 if not hasattr(self, '_query'):
194 self._split(self._url)
195 return self._query
196 def _set_query(self, v):
197 if v != self._query:
198 self._query = v
199 del self.querydict
200 del self.url
201 def _del_query(self):
202 try:
203 del self._query
204 except AttributeError:
205 pass
206
207 def _get_querydict(self):
208 if not hasattr(self, '_querydict'):
209 self._querydict = cgi.parse_qs(self.query)
210 return self._querydict
211 def _set_querydict(self, v):
212 self._querydict = v
213 self._query = urllib.urlencode(self._querydict)
214 del self.url
215 def _del_querydict(self):
216 try:
217 del self._querydict
218 except AttributeError:
219 pass
220
221 def _get_fragment(self):
222 if not hasattr(self, '_fragment'):
223 self._split(self._url)
224 return self._fragment
225 def _set_fragment(self, v):
226 if v != self._fragment:
227 self._fragment = v
228 del self.url
229 def _del_fragment(self):
230 try:
231 del self._fragment
232 except AttributeError:
233 pass
234
235 def _get_url(self):
236 if not hasattr(self, '_url') or self._url is None:
237 self._url = urlparse.urlunsplit((self.scheme, self.server, self.path, self.query, self.fragment))
238 return self._url
239 def _set_url(self, v):
240 if v != self._url:
241 self._url = v
242 del self.scheme
243 del self.host
244 del self.port
245 del self.server
246 del self.path
247 del self.query
248 del self.querydict
249 del self.fragment
250 def _del_url(self):
251 try:
252 del self._url
253 except AttributeError:
254 pass
255
256 def _get_local(self):
257 if not hasattr(self, '_local'):
258 self._local = urlparse.urlunsplit(('', '', self.path, self.query, self.fragment))
259 return self._local
260
261 scheme = property(_get_scheme, _set_scheme, _del_scheme)
262 server = property(_get_server, _set_server, _del_server)
263 host = property(_get_host, _set_host, _del_host)
264 port = property(_get_port, _set_port, _del_port)
265 path = property(_get_path, _set_path, _del_path)
266 query = property(_get_query, _set_query, _del_query)
267 querydict = property(_get_querydict, _set_querydict, _del_querydict)
268 fragment = property(_get_fragment, _set_fragment, _del_fragment)
269 url = property(_get_url, _set_url, _del_url)
270 local = property(_get_local)
271
272 if __name__ == '__main__':
273 curr = URL('http://www.google.de/asdfasdf/asdfasdf?asdfasdf=adsfsadf/')
274 assert str(curr) == 'http://www.google.de/asdfasdf/asdfasdf?asdfasdf=adsfsadf/'
275 assert curr.scheme == 'http'
276 assert curr.server == 'www.google.de'
277 assert (curr.host, curr.port) == ('www.google.de', 80)
278 assert curr.path == '/asdfasdf/asdfasdf'
279 assert curr.query == 'asdfasdf=adsfsadf/'
280 assert curr.fragment == ''
281 print "splitting assertions successful"
282
283 curr = URL(scheme='http', server='wikiwikiweb.de', path='/FrontPage', action="diff", rev1=2, rev2=3, fragment='line-123')
284 assert str(curr) == 'http://wikiwikiweb.de/FrontPage?action=diff&rev1=2&rev2=3#line-123'
285 assert (curr.host, curr.port) == ('wikiwikiweb.de', 80)
286 print "joining assertions successful"
287
288 curr = URL("http://wikiwikiweb.de/FrontPage")
289 u = URL(curr, scheme="https")
290 assert str(u) == 'https://wikiwikiweb.de/FrontPage'
291 u = URL(curr, action="raw", rev=42)
292 assert str(u) == 'http://wikiwikiweb.de/FrontPage?action=raw&rev=42'
293 u = URL(curr, action="AttachFile", do="get", file=u"Übler Dübel.doc")
294 assert str(u) == 'http://wikiwikiweb.de/FrontPage?action=AttachFile&do=get&rev=42&file=%C3%9Cbler+D%C3%BCbel.doc'
295 u = URL(u, action=None, do=None, rev=None, file=None)
296 assert str(u) == 'http://wikiwikiweb.de/FrontPage'
297 print "updating assertions successful"
298
299
300 u = URL(curr, port=42, path="/wiki/img/moinmoin.png")
301 print u
302 assert str(u) == 'http://wikiwikiweb.de:42/wiki/img/moinmoin.png'