Gary Baker is a C and assembler programmer with a vast amount of experience on a myriad of Windows and embedded systems. If you have any bespoke projects or contracts you wish to discuss please email g_b001@yahoo.co.uk

Windows C Programming projects

Socket basics: a routine to go get a web page

This routine will grab a web page or an ascii file using either a url or an IP address. It will support a POST or GET. If you have questions about sockets, includes, libraries or CGI POST and GET data then email me and I'll address them with another page.

It will need a few small support functions.

The first is a quick function to see if a string looks more like an IP address than a URL. Not a full IP validation but good enough for our purposes.


/* **************
	int isip()

	if they're all digits and have 3 dots
	good enough - connect() will fail if anything else wrong

	returns 1 - is ok
		0 - not ok

*************** */

int isip(char *p)	// p = "127.4.5.6" etc
{
	int ndots = 0;

	while (*p)
	{
		if (*p == '.')
			ndots++;
		else
		if (!isdigit(*p))
			return 0;
		p++;
	}
	return (ndots == 3);
}

	

This next routine checks to see if we have any data waiting for us to read from our socket.


/* *************************
	int so_dataready()

	see if socket has any data for us

	return > 0 - data is ready
	       <= 0 - no data or error
	
************************* */

int so_dataready(SOCKET so, 		// socket
		unsigned int sec, 	// seconds to wait
		unsigned int usec)	// microseconds (no way is it this accurate)
{
	fd_set readfds;
	struct timeval t;

	t.tv_sec = sec;
	t.tv_usec = usec; 
	readfds.fd_count = 1;
	readfds.fd_array[0] = so;
	return select(0, &readfds, NULL, NULL, &t);
}
	

This next function sends the data. It's a little more tollerant than the general send() routine as it allows for Partial sends.



/* **************************
	int mysend()

	a send routine that is more tollerant of 
	partial sends

	returns Number Bytes Sent
	
**************************** */

int mysend(SOCKET s, 		// the socket
	const char FAR * buf,	// the data to send
	int len, 		// length of data
	int flags, 		// flags - set to 0 - will discuss another time
	int TimeOut		// in seconds - 30 seems okay
	)
{
	int i, sent = 0;
	fd_set fds;
	struct timeval t;
	
	while (sent < len)
	{
		FD_ZERO(&fds);
		FD_SET(s, &fds);
		t.tv_sec = TimeOut;
		t.tv_usec = 0;
		i = select(1, NULL, &fds, NULL, &t);
		if (i > 0)
		{
			i = send(s, buf + sent, len - sent, flags);
			if (i == SOCKET_ERROR)
				return i;
		} else
			return SOCKET_ERROR;	
		sent += i;
	}
	return sent;
}
	

Here's the main routine.


/* ***************
	int httpcommand()

	A simple socket routine to go
	get a web page or any other ascii web file (javascript etc)
	as one item

	assumes port 80

*************** */


int httpcommand(char *httpdomain, // "www.google.co.uk" or "192.168.2.1" etc
		char *method, 	// "post" or "get"
		char *action, 	// "/index.html" or whatever comes after the url
		char *data,	// For "post", NULL if "get". This is the 'field' data. eg "a=hello&b=world"
		char *resp,	// returned page goes here - notice it's one big blob! ie malloc(64000)
		int max_len,	// size of the buffer resp points to
		)
{
	char *clientver;
	char *in_buff;
	SOCKET so;
	struct sockaddr_in sa;
	struct hostent FAR *ph;
	int iret = 0, i;
	struct linger ling;
    	int one = 1;
	char tmp[256];

			// You can pretend to be a browser here - see what different it makes
	clientver =  "Yadda yadda";

	in_buff = calloc(1, 1024);	// general purpose input buffer
	if (in_buff == NULL)
	{
		// Your ErrorRoutine - calloc() failed
		return -1;
	}

	if (isip(httpdomain))// if httpdomain is a valid ip address format (3 dots)
	{
		sa.sin_addr.s_addr = inet_addr(httpdomain);// convert it to a unsigned long value
	} else
	{
		ph = gethostbyname(httpdomain);// find the name and address associated with the ip name
		if (ph == NULL)
		{
			free(in_buff);
			// Your ErrorRoutine - "gethostbyname(%s) failed",httpdomain
			return -1;
		}
		sa.sin_addr.s_addr = *((unsigned long *) ph->h_addr);
	}
    
	so = socket(AF_INET,SOCK_STREAM,0);	// create a socket 
	if (so == INVALID_SOCKET)
	{
		free(in_buff);
		// Your ErrorRoutine - socket() failed
		return -1;
	}

	sa.sin_family = AF_INET;	// c++ v4.0 says this must be 'PF_INET'
   	sa.sin_port = htons(80); 	// the default http port

	ling.l_onoff = 1;
	ling.l_linger = MAX_SECS_TO_LINGER;

	setsockopt(so, SOL_SOCKET, SO_LINGER, (const char *) &ling, sizeof(ling));
	setsockopt(so, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(int));
	setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(int));
	setsockopt(so, IPPROTO_TCP, TCP_NODELAY, (char *) &one,  sizeof(int));

	if (connect(so, (struct sockaddr *) &sa, sizeof(sa)))	// connect socket 'so' to peer 'sa'	
	{
		iret = -1;
		// Your ErrorRoutine - connect() failed");
		goto QUITHTTP;
	}

	if (stricmp(method, "post") == 0)	// data is seperate
	{
		sprintf(in_buff, 
			"%s %s HTTP/1.0\r\nUser-Agent: %s\r\nHost: %s\r\nContent-Length: %d\r\n\r\n%s\r\n",
			method,
			action,
			clientver,
			httpdomain,
			strlen(data),
			data);
	} else	// assume "get"				
	{					// data may be in action field eg "\page.html?a=Hello&b=world"

		sprintf(in_buff, 
			"%s %s HTTP/1.0\r\nUser-Agent: %s\r\nHost: %s\r\n\r\n",
			method,
			action,
			clientver,
			httpdomain);
	}

	mysend(so, in_buff, strlen(in_buff), 0, 30);	// send in_buff to socket

	if (resp)	
	{
		memset(resp, 0,max_len);		// set the resp to 0's
		i = 0;
		while (so_dataready(so, 10, 0) > 0 && i < (max_len-2))
		{
			i = recv(so, resp + i, max_len - 2 - i, 0);
			if (i <= 0)
				break;
			i = strlen(resp);
		}
	}

QUITHTTP:
	free(in_buff);
	closesocket(so);
	return iret;
}

	

What you do with the page data is up to you but I suggest saving the data in a file then calling Notepad.exe so you can have a look. You'll see some interesting headers before the actual page. Have fun.

An example of calling notepad would be:



	char *resp;
	FILE *f;

	resp = calloc(1, 64000);
	if (resp == NULL)
		; // error and quit routine

	httpcommand("www.google.co.uk","GET","/",NULL, resp, 64000);

	f = my_fopen("c:\\oi.txt", "wb"); // tmp file 
	if (f)
	{
		fwrite( resp, strlen(resp), 1, f); // write the response to file 'f'
		fclose(f);
		WinExec("notepad.exe c:\\oi.txt", SW_SHOW); // execute notepad
	} else
		; // file create error	

	// do what you will but don't forget
	free(resp);

	



  • Copyright (c) Gary Baker 2009 some rights reserved.
  • If you redistribute Gary Baker's content, please follow these conditions:
    • You must attribute the work to Gary Baker by prominently linking back to the source pages on Brambling Books. An ideal example would be: Article or tutorial provided by Gary Baker, BramblingBooks.co.uk/Software/
    • You must not use the content for commercial purposes. However, commercial republishing requests are often granted if you email info@bramblingbooks.co.uk and ask.
    • If you alter, remix, transform, or build upon work found on Brambling Books, you may distribute the resulting work only under the same or similar conditions.