Skip to content

Commit 46e1640

Browse files
committed
url: use the URL API internally as well
... to make it a truly unified URL parser. Closes #3017
1 parent f078361 commit 46e1640

File tree

22 files changed

+376
-918
lines changed

22 files changed

+376
-918
lines changed

lib/curl_path.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* | (__| |_| | _ <| |___
66
* \___|\___/|_| \_\_____|
77
*
8-
* Copyright (C) 1998 - 2017, Daniel Stenberg, <[email protected]>, et al.
8+
* Copyright (C) 1998 - 2018, Daniel Stenberg, <[email protected]>, et al.
99
*
1010
* This software is licensed as described in the file COPYING, which
1111
* you should have received as part of this distribution. The terms
@@ -39,7 +39,7 @@ CURLcode Curl_getworkingpath(struct connectdata *conn,
3939
char *working_path;
4040
size_t working_path_len;
4141
CURLcode result =
42-
Curl_urldecode(data, data->state.path, 0, &working_path,
42+
Curl_urldecode(data, data->state.up.path, 0, &working_path,
4343
&working_path_len, FALSE);
4444
if(result)
4545
return result;

lib/dict.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ static CURLcode dict_do(struct connectdata *conn, bool *done)
136136
struct Curl_easy *data = conn->data;
137137
curl_socket_t sockfd = conn->sock[FIRSTSOCKET];
138138

139-
char *path = data->state.path;
139+
char *path = data->state.up.path;
140140
curl_off_t *bytecount = &data->req.bytecount;
141141

142142
*done = TRUE; /* unconditionally */

lib/easy.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,10 +1002,6 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data)
10021002
*/
10031003
void curl_easy_reset(struct Curl_easy *data)
10041004
{
1005-
Curl_safefree(data->state.pathbuffer);
1006-
1007-
data->state.path = NULL;
1008-
10091005
Curl_free_request_state(data);
10101006

10111007
/* zero out UserDefined data: */

lib/file.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ static CURLcode file_connect(struct connectdata *conn, bool *done)
143143
#endif
144144
size_t real_path_len;
145145

146-
CURLcode result = Curl_urldecode(data, data->state.path, 0, &real_path,
146+
CURLcode result = Curl_urldecode(data, data->state.up.path, 0, &real_path,
147147
&real_path_len, FALSE);
148148
if(result)
149149
return result;
@@ -197,7 +197,7 @@ static CURLcode file_connect(struct connectdata *conn, bool *done)
197197

198198
file->fd = fd;
199199
if(!data->set.upload && (fd == -1)) {
200-
failf(data, "Couldn't open file %s", data->state.path);
200+
failf(data, "Couldn't open file %s", data->state.up.path);
201201
file_done(conn, CURLE_FILE_COULDNT_READ_FILE, FALSE);
202202
return CURLE_FILE_COULDNT_READ_FILE;
203203
}

lib/ftp.c

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,6 +1444,7 @@ static CURLcode ftp_state_list(struct connectdata *conn)
14441444
{
14451445
CURLcode result = CURLE_OK;
14461446
struct Curl_easy *data = conn->data;
1447+
struct FTP *ftp = data->req.protop;
14471448

14481449
/* If this output is to be machine-parsed, the NLST command might be better
14491450
to use, since the LIST command output is not specified or standard in any
@@ -1460,7 +1461,7 @@ static CURLcode ftp_state_list(struct connectdata *conn)
14601461
then just do LIST (in that case: nothing to do here)
14611462
*/
14621463
char *cmd, *lstArg, *slashPos;
1463-
const char *inpath = data->state.path;
1464+
const char *inpath = ftp->path;
14641465

14651466
lstArg = NULL;
14661467
if((data->set.ftp_filemethod == FTPFILE_NOCWD) &&
@@ -3141,7 +3142,7 @@ static CURLcode ftp_done(struct connectdata *conn, CURLcode status,
31413142
int ftpcode;
31423143
CURLcode result = CURLE_OK;
31433144
char *path = NULL;
3144-
const char *path_to_use = data->state.path;
3145+
const char *path_to_use = ftp->path;
31453146

31463147
if(!ftp)
31473148
return CURLE_OK;
@@ -3346,7 +3347,7 @@ static CURLcode ftp_done(struct connectdata *conn, CURLcode status,
33463347
/* Send any post-transfer QUOTE strings? */
33473348
if(!status && !result && !premature && data->set.postquote)
33483349
result = ftp_sendquote(conn, data->set.postquote);
3349-
3350+
Curl_safefree(ftp->pathalloc);
33503351
return result;
33513352
}
33523353

@@ -3695,12 +3696,13 @@ static void wc_data_dtor(void *ptr)
36953696
static CURLcode init_wc_data(struct connectdata *conn)
36963697
{
36973698
char *last_slash;
3698-
char *path = conn->data->state.path;
3699+
struct FTP *ftp = conn->data->req.protop;
3700+
char *path = ftp->path;
36993701
struct WildcardData *wildcard = &(conn->data->wildcard);
37003702
CURLcode result = CURLE_OK;
37013703
struct ftp_wc *ftpwc = NULL;
37023704

3703-
last_slash = strrchr(conn->data->state.path, '/');
3705+
last_slash = strrchr(ftp->path, '/');
37043706
if(last_slash) {
37053707
last_slash++;
37063708
if(last_slash[0] == '\0') {
@@ -3757,7 +3759,7 @@ static CURLcode init_wc_data(struct connectdata *conn)
37573759
goto fail;
37583760
}
37593761

3760-
wildcard->path = strdup(conn->data->state.path);
3762+
wildcard->path = strdup(ftp->path);
37613763
if(!wildcard->path) {
37623764
result = CURLE_OUT_OF_MEMORY;
37633765
goto fail;
@@ -3828,16 +3830,15 @@ static CURLcode wc_statemach(struct connectdata *conn)
38283830
/* filelist has at least one file, lets get first one */
38293831
struct ftp_conn *ftpc = &conn->proto.ftpc;
38303832
struct curl_fileinfo *finfo = wildcard->filelist.head->ptr;
3833+
struct FTP *ftp = conn->data->req.protop;
38313834

38323835
char *tmp_path = aprintf("%s%s", wildcard->path, finfo->filename);
38333836
if(!tmp_path)
38343837
return CURLE_OUT_OF_MEMORY;
38353838

3836-
/* switch default "state.pathbuffer" and tmp_path, good to see
3837-
ftp_parse_url_path function to understand this trick */
3838-
Curl_safefree(conn->data->state.pathbuffer);
3839-
conn->data->state.pathbuffer = tmp_path;
3840-
conn->data->state.path = tmp_path;
3839+
/* switch default ftp->path and tmp_path */
3840+
free(ftp->pathalloc);
3841+
ftp->pathalloc = ftp->path = tmp_path;
38413842

38423843
infof(conn->data, "Wildcard - START of \"%s\"\n", finfo->filename);
38433844
if(conn->data->set.chunk_bgn) {
@@ -4105,7 +4106,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn)
41054106
struct FTP *ftp = data->req.protop;
41064107
struct ftp_conn *ftpc = &conn->proto.ftpc;
41074108
const char *slash_pos; /* position of the first '/' char in curpos */
4108-
const char *path_to_use = data->state.path;
4109+
const char *path_to_use = ftp->path;
41094110
const char *cur_pos;
41104111
const char *filename = NULL;
41114112

@@ -4191,7 +4192,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn)
41914192
/* parse the URL path into separate path components */
41924193
while((slash_pos = strchr(cur_pos, '/')) != NULL) {
41934194
/* 1 or 0 pointer offset to indicate absolute directory */
4194-
ssize_t absolute_dir = ((cur_pos - data->state.path > 0) &&
4195+
ssize_t absolute_dir = ((cur_pos - ftp->path > 0) &&
41954196
(ftpc->dirdepth == 0))?1:0;
41964197

41974198
/* seek out the next path component */
@@ -4268,7 +4269,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn)
42684269
size_t dlen;
42694270
char *path;
42704271
CURLcode result =
4271-
Curl_urldecode(conn->data, data->state.path, 0, &path, &dlen, TRUE);
4272+
Curl_urldecode(conn->data, ftp->path, 0, &path, &dlen, TRUE);
42724273
if(result) {
42734274
freedirs(ftpc);
42744275
return result;
@@ -4388,16 +4389,16 @@ static CURLcode ftp_setup_connection(struct connectdata *conn)
43884389
char *type;
43894390
struct FTP *ftp;
43904391

4391-
conn->data->req.protop = ftp = malloc(sizeof(struct FTP));
4392+
conn->data->req.protop = ftp = calloc(sizeof(struct FTP), 1);
43924393
if(NULL == ftp)
43934394
return CURLE_OUT_OF_MEMORY;
43944395

4395-
data->state.path++; /* don't include the initial slash */
4396+
ftp->path = &data->state.up.path[1]; /* don't include the initial slash */
43964397
data->state.slash_removed = TRUE; /* we've skipped the slash */
43974398

43984399
/* FTP URLs support an extension like ";type=<typecode>" that
43994400
* we'll try to get now! */
4400-
type = strstr(data->state.path, ";type=");
4401+
type = strstr(ftp->path, ";type=");
44014402

44024403
if(!type)
44034404
type = strstr(conn->host.rawalloc, ";type=");

lib/ftp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ struct FTP {
105105
curl_off_t *bytecountp;
106106
char *user; /* user name string */
107107
char *passwd; /* password string */
108+
char *path; /* points to the urlpieces struct field */
109+
char *pathalloc; /* if non-NULL a pointer to an allocated path */
108110

109111
/* transfer a file/body or not, done as a typedefed enum just to make
110112
debuggers display the full symbol and not just the numerical value */

lib/gopher.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ static CURLcode gopher_do(struct connectdata *conn, bool *done)
7878
curl_socket_t sockfd = conn->sock[FIRSTSOCKET];
7979

8080
curl_off_t *bytecount = &data->req.bytecount;
81-
char *path = data->state.path;
81+
char *path = data->state.up.path;
8282
char *sel = NULL;
8383
char *sel_org = NULL;
8484
ssize_t amount, k;

lib/http.c

Lines changed: 76 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,7 +1877,8 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
18771877
struct Curl_easy *data = conn->data;
18781878
CURLcode result = CURLE_OK;
18791879
struct HTTP *http;
1880-
const char *ppath = data->state.path;
1880+
const char *path = data->state.up.path;
1881+
const char *query = data->state.up.query;
18811882
bool paste_ftp_userpwd = FALSE;
18821883
char ftp_typecode[sizeof("/;type=?")] = "";
18831884
const char *host = conn->host.name;
@@ -1995,7 +1996,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
19951996
}
19961997

19971998
/* setup the authentication headers */
1998-
result = Curl_http_output_auth(conn, request, ppath, FALSE);
1999+
result = Curl_http_output_auth(conn, request, path, FALSE);
19992000
if(result)
20002001
return result;
20012002

@@ -2223,47 +2224,59 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
22232224
/* The path sent to the proxy is in fact the entire URL. But if the remote
22242225
host is a IDN-name, we must make sure that the request we produce only
22252226
uses the encoded host name! */
2227+
2228+
/* and no fragment part */
2229+
CURLUcode uc;
2230+
char *url;
2231+
CURLU *h = curl_url_dup(data->state.uh);
2232+
if(!h)
2233+
return CURLE_OUT_OF_MEMORY;
2234+
22262235
if(conn->host.dispname != conn->host.name) {
2227-
char *url = data->change.url;
2228-
ptr = strstr(url, conn->host.dispname);
2229-
if(ptr) {
2230-
/* This is where the display name starts in the URL, now replace this
2231-
part with the encoded name. TODO: This method of replacing the host
2232-
name is rather crude as I believe there's a slight risk that the
2233-
user has entered a user name or password that contain the host name
2234-
string. */
2235-
size_t currlen = strlen(conn->host.dispname);
2236-
size_t newlen = strlen(conn->host.name);
2237-
size_t urllen = strlen(url);
2238-
2239-
char *newurl;
2240-
2241-
newurl = malloc(urllen + newlen - currlen + 1);
2242-
if(newurl) {
2243-
/* copy the part before the host name */
2244-
memcpy(newurl, url, ptr - url);
2245-
/* append the new host name instead of the old */
2246-
memcpy(newurl + (ptr - url), conn->host.name, newlen);
2247-
/* append the piece after the host name */
2248-
memcpy(newurl + newlen + (ptr - url),
2249-
ptr + currlen, /* copy the trailing zero byte too */
2250-
urllen - (ptr-url) - currlen + 1);
2251-
if(data->change.url_alloc) {
2252-
Curl_safefree(data->change.url);
2253-
data->change.url_alloc = FALSE;
2254-
}
2255-
data->change.url = newurl;
2256-
data->change.url_alloc = TRUE;
2257-
}
2258-
else
2259-
return CURLE_OUT_OF_MEMORY;
2236+
uc = curl_url_set(h, CURLUPART_HOST, conn->host.name, 0);
2237+
if(uc) {
2238+
curl_url_cleanup(h);
2239+
return CURLE_OUT_OF_MEMORY;
22602240
}
22612241
}
2262-
ppath = data->change.url;
2263-
if(checkprefix("ftp://", ppath)) {
2242+
uc = curl_url_set(h, CURLUPART_FRAGMENT, NULL, 0);
2243+
if(uc) {
2244+
curl_url_cleanup(h);
2245+
return CURLE_OUT_OF_MEMORY;
2246+
}
2247+
2248+
if(strcasecompare("http", data->state.up.scheme)) {
2249+
/* when getting HTTP, we don't want the userinfo the URL */
2250+
uc = curl_url_set(h, CURLUPART_USER, NULL, 0);
2251+
if(uc) {
2252+
curl_url_cleanup(h);
2253+
return CURLE_OUT_OF_MEMORY;
2254+
}
2255+
uc = curl_url_set(h, CURLUPART_PASSWORD, NULL, 0);
2256+
if(uc) {
2257+
curl_url_cleanup(h);
2258+
return CURLE_OUT_OF_MEMORY;
2259+
}
2260+
}
2261+
/* now extract the new version of the URL */
2262+
uc = curl_url_get(h, CURLUPART_URL, &url, 0);
2263+
if(uc) {
2264+
curl_url_cleanup(h);
2265+
return CURLE_OUT_OF_MEMORY;
2266+
}
2267+
2268+
if(data->change.url_alloc)
2269+
free(data->change.url);
2270+
2271+
data->change.url = url;
2272+
data->change.url_alloc = TRUE;
2273+
2274+
curl_url_cleanup(h);
2275+
2276+
if(strcasecompare("ftp", data->state.up.scheme)) {
22642277
if(data->set.proxy_transfer_mode) {
22652278
/* when doing ftp, append ;type=<a|i> if not present */
2266-
char *type = strstr(ppath, ";type=");
2279+
char *type = strstr(path, ";type=");
22672280
if(type && type[6] && type[7] == 0) {
22682281
switch(Curl_raw_toupper(type[6])) {
22692282
case 'A':
@@ -2278,7 +2291,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
22782291
char *p = ftp_typecode;
22792292
/* avoid sending invalid URLs like ftp://example.com;type=i if the
22802293
* user specified ftp://example.com without the slash */
2281-
if(!*data->state.path && ppath[strlen(ppath) - 1] != '/') {
2294+
if(!*data->state.up.path && path[strlen(path) - 1] != '/') {
22822295
*p++ = '/';
22832296
}
22842297
snprintf(p, sizeof(ftp_typecode) - 1, ";type=%c",
@@ -2431,18 +2444,32 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
24312444
if(result)
24322445
return result;
24332446

2434-
if(data->set.str[STRING_TARGET])
2435-
ppath = data->set.str[STRING_TARGET];
2447+
if(data->set.str[STRING_TARGET]) {
2448+
path = data->set.str[STRING_TARGET];
2449+
query = NULL;
2450+
}
24362451

24372452
/* url */
2438-
if(paste_ftp_userpwd)
2453+
if(conn->bits.httpproxy && !conn->bits.tunnel_proxy) {
2454+
char *url = data->change.url;
2455+
result = Curl_add_buffer(&req_buffer, url, strlen(url));
2456+
if(result)
2457+
return result;
2458+
}
2459+
else if(paste_ftp_userpwd)
24392460
result = Curl_add_bufferf(&req_buffer, "ftp://%s:%s@%s",
24402461
conn->user, conn->passwd,
2441-
ppath + sizeof("ftp://") - 1);
2442-
else
2443-
result = Curl_add_buffer(&req_buffer, ppath, strlen(ppath));
2444-
if(result)
2445-
return result;
2462+
path + sizeof("ftp://") - 1);
2463+
else {
2464+
result = Curl_add_buffer(&req_buffer, path, strlen(path));
2465+
if(result)
2466+
return result;
2467+
if(query) {
2468+
result = Curl_add_bufferf(&req_buffer, "?%s", query);
2469+
if(result)
2470+
return result;
2471+
}
2472+
}
24462473

24472474
result =
24482475
Curl_add_bufferf(&req_buffer,
@@ -2515,7 +2542,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
25152542
co = Curl_cookie_getlist(data->cookies,
25162543
conn->allocptr.cookiehost?
25172544
conn->allocptr.cookiehost:host,
2518-
data->state.path,
2545+
data->state.up.path,
25192546
(conn->handler->protocol&CURLPROTO_HTTPS)?
25202547
TRUE:FALSE);
25212548
Curl_share_unlock(data, CURL_LOCK_DATA_COOKIE);
@@ -3836,7 +3863,7 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
38363863
here, or else use real peer host name. */
38373864
conn->allocptr.cookiehost?
38383865
conn->allocptr.cookiehost:conn->host.name,
3839-
data->state.path);
3866+
data->state.up.path);
38403867
Curl_share_unlock(data, CURL_LOCK_DATA_COOKIE);
38413868
}
38423869
#endif

0 commit comments

Comments
 (0)