WvStreams
wvurl.cc
1/*
2 * Worldvisions Weaver Software:
3 * Copyright (C) 1997-2002 Net Integration Technologies, Inc.
4 *
5 * WvUrl is a simple URL-parsing class with built-in (though still somewhat
6 * inconvenient) DNS resolution.
7 *
8 * See wvurl.h.
9 */
10#include "wvurl.h"
11#include "strutils.h"
12
13// A static list of the default ports for each protocol.
15{
16 const char *proto;
17 int port;
18 bool uses_slashes;
19};
20
21// The protocols must be arranged from longest to shortest because they're
22// compared with strncmp, so "https://" will also match http.
23static DefaultPort portmap[] = {
24 { "exchangeits", 7070, false },
25 { "exchangeit", 6969, false },
26 { "https", 443, true },
27 { "http", 80, true },
28 { "file", 0, true },
29 { "sip", 5060, false },
30 { "ftp", 21, true },
31 { "ldaps", 636, false },
32 { "ldap", 389, false },
33 { NULL, 0 }
34};
35
36// Look up the protocol and return the default port.
37static int get_default_port(WvString proto)
38{
39 DefaultPort *p = portmap;
40 for (p = portmap; p->proto != NULL; p++)
41 {
42 if (strncmp(p->proto, proto, strlen(p->proto)) == 0)
43 return p->port;
44 }
45 return -1;
46}
47
48// Look up the protocol and decide whether it uses slashes (http) or not (sip)
49// A check of rfc2396 shows that the URI standard actually distinguishes
50// these: 'hierarchical' vs. 'opaque'.
51static bool protocol_uses_slashes(WvString proto)
52{
53 DefaultPort *p = portmap;
54 for (p = portmap; p->proto != NULL; p++)
55 {
56 if (strncmp(p->proto, proto, strlen(p->proto)) == 0)
57 return p->uses_slashes;
58 }
59 return false;
60}
61
62// Split up the URL into a hostname, a port, and the rest of it.
63WvUrl::WvUrl(WvStringParm url) : err("No error")
64{
65 WvString work(url);
66 char *cptr, *wptr = work.edit();
67
68 port = 0; // error condition by default
69 addr = NULL;
70 resolving = true;
71
72 // deal with extra whitespace.
73 wptr = trim_string(wptr);
74 cptr = wptr + strcspn(wptr, " \t\r\n");
75 *cptr = 0;
76
77 // if it's not one of these easy prefixes, give up. Our URL parser is
78 // pretty dumb.
79 if (get_default_port(wptr) < 0)
80 {
81 err = "WvUrl cannot handle the given protocol.";
82 return;
83 }
84
85 cptr = strchr(wptr, ':');
86 if (!cptr)
87 {
88 err = "No colon after the protocol.";
89 return;
90 }
91 *cptr = 0;
92 proto = wptr;
93
94 bool use_slashes = protocol_uses_slashes(proto);
95 wptr = cptr + (use_slashes ? 3 : 1);
96
97 cptr = strchr(wptr, '@');
98 if (!cptr) // no user given
99 {
100 user = "";
101 password = "";
102 }
103 else
104 {
105 *cptr = 0;
106 char *cptr2 = strchr(wptr, ':');
107 if (cptr2 && (*(cptr2+1) != 0))
108 {
109 *cptr2 = 0;
110 password = cptr2 + 1;
111 }
112 else
113 password = "";
114 user = wptr;
115 wptr = cptr + 1;
116 }
117
118 cptr = strchr(wptr, '/');
119 if (!cptr) // no path given
120 file = use_slashes ? "/" : "";
121 else
122 {
123 file = cptr;
124 *cptr = 0;
125 }
126
127 cptr = strchr(wptr, ':');
128 if (!cptr)
129 port = get_default_port(proto);
130 else
131 {
132 port = atoi(cptr+1);
133 *cptr = 0;
134 }
135
136 hostname = wptr;
137
138 resolve();
139}
140
141
142WvUrl::WvUrl(const WvUrl &url) : err("No error")
143{
144 addr = NULL;
145 resolving = true;
146
147 proto = url.proto;
148 user = url.user;
149 password = url.password;
150 hostname = url.hostname;
151 file = url.file;
152 port = url.port;
153
154 resolve();
155}
156
157
158WvUrl::~WvUrl()
159{
160 if (addr) delete addr;
161}
162
163
164bool WvUrl::resolve()
165{
166 const WvIPAddr *ip;
167 int numaddrs;
168
169 numaddrs = dns.findaddr(0, hostname, &ip);
170 if (!numaddrs) // error condition
171 {
172 err = WvString("Host '%s' could not be found.", hostname);
173 resolving = false;
174 return false;
175 }
176 else if (numaddrs < 0) // still waiting
177 {
178 resolving = true;
179 return false;
180 }
181 else // got at least one address
182 {
183 resolving = false;
184 if (addr) delete addr;
185 addr = new WvIPPortAddr(*ip, port);
186 return true;
187 }
188}
189
190
191// Print out the URL, using the port name (if it's not 80), and either the
192// hostname (if we know it) or the address (if we know that instead.)
193WvUrl::operator WvString () const
194{
195 if (!isok())
196 return WvString("(Invalid URL: %s)", err);
197
198 WvString protostr;
199 if (protocol_uses_slashes(proto))
200 protostr = WvString("%s://", proto);
201 else
202 protostr = WvString("%s:", proto);
203 WvString userstr("");
204 if (user && user.len() != 0)
205 {
206 userstr = WvString("%s", user);
207 if (password && password.len() != 0)
208 userstr.append(WvString(":%s@", password));
209 else
210 userstr.append("@");
211 }
212 WvString portstr("");
213 if (port && port != get_default_port(proto))
214 portstr = WvString(":%s", port);
215 if (hostname)
216 return WvString("%s%s%s%s%s", protostr, userstr, hostname, portstr, file);
217 else if (addr)
218 return WvString("%s%s%s%s%s", protostr, userstr, *addr, portstr, file);
219 else
220 {
221 assert(0);
222 return WvString("(Invalid URL)");
223 }
224}
225
226
A WvFastString acts exactly like a WvString, but can take (const char *) strings without needing to a...
An IP address is made up of a "dotted quad" – four decimal numbers in the form www....
An IP+Port address also includes a port number, with the resulting form www.xxx.yyy....
int findaddr(int msec_timeout, WvStringParm name, WvIPAddr const **addr, WvIPAddrList *addrlist=NULL)
Return -1 on timeout, or the number of addresses found, which may be 0 if the address does not exist.
WvString is an implementation of a simple and efficient printable-string class.
WvString hostname()
Do gethostname() without a fixed-length buffer.
Definition strutils.cc:870
char * trim_string(char *string)
Trims whitespace from the beginning and end of the character string, including carriage return / line...
Definition strutils.cc:59