Windows C Programming projects
Socket basics: a routine to go get a web page
This routine will grab a web page or an ascii file using either a url or an IP address. It will support a POST or GET. If you have questions about sockets, includes, libraries or CGI POST and GET data then email me and I'll address them with another page.
It will need a few small support functions.
The first is a quick function to see if a string looks more like an IP address than a URL. Not a full IP validation but good enough for our purposes.
/* **************
int isip()
if they're all digits and have 3 dots
good enough - connect() will fail if anything else wrong
returns 1 - is ok
0 - not ok
*************** */
int isip(char *p) // p = "127.4.5.6" etc
{
int ndots = 0;
while (*p)
{
if (*p == '.')
ndots++;
else
if (!isdigit(*p))
return 0;
p++;
}
return (ndots == 3);
}
This next routine checks to see if we have any data waiting for us to read from our socket.
/* *************************
int so_dataready()
see if socket has any data for us
return > 0 - data is ready
<= 0 - no data or error
************************* */
int so_dataready(SOCKET so, // socket
unsigned int sec, // seconds to wait
unsigned int usec) // microseconds (no way is it this accurate)
{
fd_set readfds;
struct timeval t;
t.tv_sec = sec;
t.tv_usec = usec;
readfds.fd_count = 1;
readfds.fd_array[0] = so;
return select(0, &readfds, NULL, NULL, &t);
}
This next function sends the data. It's a little more tollerant than the general send() routine as it allows for Partial sends.
/* **************************
int mysend()
a send routine that is more tollerant of
partial sends
returns Number Bytes Sent
**************************** */
int mysend(SOCKET s, // the socket
const char FAR * buf, // the data to send
int len, // length of data
int flags, // flags - set to 0 - will discuss another time
int TimeOut // in seconds - 30 seems okay
)
{
int i, sent = 0;
fd_set fds;
struct timeval t;
while (sent < len)
{
FD_ZERO(&fds);
FD_SET(s, &fds);
t.tv_sec = TimeOut;
t.tv_usec = 0;
i = select(1, NULL, &fds, NULL, &t);
if (i > 0)
{
i = send(s, buf + sent, len - sent, flags);
if (i == SOCKET_ERROR)
return i;
} else
return SOCKET_ERROR;
sent += i;
}
return sent;
}
Here's the main routine.
/* ***************
int httpcommand()
A simple socket routine to go
get a web page or any other ascii web file (javascript etc)
as one item
assumes port 80
*************** */
int httpcommand(char *httpdomain, // "www.google.co.uk" or "192.168.2.1" etc
char *method, // "post" or "get"
char *action, // "/index.html" or whatever comes after the url
char *data, // For "post", NULL if "get". This is the 'field' data. eg "a=hello&b=world"
char *resp, // returned page goes here - notice it's one big blob! ie malloc(64000)
int max_len, // size of the buffer resp points to
)
{
char *clientver;
char *in_buff;
SOCKET so;
struct sockaddr_in sa;
struct hostent FAR *ph;
int iret = 0, i;
struct linger ling;
int one = 1;
char tmp[256];
// You can pretend to be a browser here - see what different it makes
clientver = "Yadda yadda";
in_buff = calloc(1, 1024); // general purpose input buffer
if (in_buff == NULL)
{
// Your ErrorRoutine - calloc() failed
return -1;
}
if (isip(httpdomain))// if httpdomain is a valid ip address format (3 dots)
{
sa.sin_addr.s_addr = inet_addr(httpdomain);// convert it to a unsigned long value
} else
{
ph = gethostbyname(httpdomain);// find the name and address associated with the ip name
if (ph == NULL)
{
free(in_buff);
// Your ErrorRoutine - "gethostbyname(%s) failed",httpdomain
return -1;
}
sa.sin_addr.s_addr = *((unsigned long *) ph->h_addr);
}
so = socket(AF_INET,SOCK_STREAM,0); // create a socket
if (so == INVALID_SOCKET)
{
free(in_buff);
// Your ErrorRoutine - socket() failed
return -1;
}
sa.sin_family = AF_INET; // c++ v4.0 says this must be 'PF_INET'
sa.sin_port = htons(80); // the default http port
ling.l_onoff = 1;
ling.l_linger = MAX_SECS_TO_LINGER;
setsockopt(so, SOL_SOCKET, SO_LINGER, (const char *) &ling, sizeof(ling));
setsockopt(so, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(int));
setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(int));
setsockopt(so, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(int));
if (connect(so, (struct sockaddr *) &sa, sizeof(sa))) // connect socket 'so' to peer 'sa'
{
iret = -1;
// Your ErrorRoutine - connect() failed");
goto QUITHTTP;
}
if (stricmp(method, "post") == 0) // data is seperate
{
sprintf(in_buff,
"%s %s HTTP/1.0\r\nUser-Agent: %s\r\nHost: %s\r\nContent-Length: %d\r\n\r\n%s\r\n",
method,
action,
clientver,
httpdomain,
strlen(data),
data);
} else // assume "get"
{ // data may be in action field eg "\page.html?a=Hello&b=world"
sprintf(in_buff,
"%s %s HTTP/1.0\r\nUser-Agent: %s\r\nHost: %s\r\n\r\n",
method,
action,
clientver,
httpdomain);
}
mysend(so, in_buff, strlen(in_buff), 0, 30); // send in_buff to socket
if (resp)
{
memset(resp, 0,max_len); // set the resp to 0's
i = 0;
while (so_dataready(so, 10, 0) > 0 && i < (max_len-2))
{
i = recv(so, resp + i, max_len - 2 - i, 0);
if (i <= 0)
break;
i = strlen(resp);
}
}
QUITHTTP:
free(in_buff);
closesocket(so);
return iret;
}
What you do with the page data is up to you but I suggest saving the data in a file then calling Notepad.exe so you can have a look. You'll see some interesting headers before the actual page. Have fun.
An example of calling notepad would be:
char *resp;
FILE *f;
resp = calloc(1, 64000);
if (resp == NULL)
; // error and quit routine
httpcommand("www.google.co.uk","GET","/",NULL, resp, 64000);
f = my_fopen("c:\\oi.txt", "wb"); // tmp file
if (f)
{
fwrite( resp, strlen(resp), 1, f); // write the response to file 'f'
fclose(f);
WinExec("notepad.exe c:\\oi.txt", SW_SHOW); // execute notepad
} else
; // file create error
// do what you will but don't forget
free(resp);
|