Setting up a new skeleton: re-factoring

Before we go much further with our SOCKS server, we should do a bit of cleaning up in the project: we’ll move the Server and Observer classes to their own library, so we can more easily re-use them, and we’ll copy the Application class over to our new project — the one that will become our next step towards a fully functional SOCKS server: Episode35.

Most of the dreary details are clearly visible in the diff of the main commit but a few interesting details show up when we compare the two Application classes:

To see the code click here.To hide the code click here.

--- bin/Episode28/Application.cpp	2011-09-26 19:58:52.938883900 -0400
+++ bin/Episode35/Application.cpp	2011-10-19 21:28:29.080693800 -0400
@@ -3,17 +3,51 @@
 #include <boost/lexical_cast.hpp>
 #include <iostream>
 #include <vector>
+#include <ws2ipdef.h>
+#include <WinSock2.h>
 #include "server/Server.h"
 #include "config.h"
+#include "rfc1928/types.h"
 
 using namespace std;
 using namespace boost;
 
+struct Application::FDGuard
+{
+	FDGuard(int fd)
+		: fd_(fd)
+		, dismissed_(false)
+	{ /* no-op */ }
+
+	~FDGuard()
+	{
+		if (!dismissed_)
+		{
+			closesocket(fd_);
+		}
+		else
+		{ /* dismissed */ }
+	}
+
+	void dismiss()
+	{
+		dismissed_ = true;
+	}
+
+private :
+	FDGuard(const FDGuard&);
+	FDGuard& operator=(const FDGuard&);
+
+	int fd_;
+	bool dismissed_;
+};
+
 Application::Application()
 : server_(0)
-, data_to_send_attribute_id_(Socket::alloc())
-, target_address_attribute_id_(Socket::alloc())
-, un_paired_socket_(0)
+, socket_state_attribute_id_(Socket::alloc())
+, receive_buffer_attribute_id_(Socket::alloc())
+, send_buffer_attribute_id_(Socket::alloc())
+, socks_reply_attribute_id_(Socket::alloc())
 {
 	WSADATA wsadata;
 	WSAStartup(MAKEWORD(2, 2), &wsadata);
@@ -29,8 +63,8 @@ void Application::run(const Application:
 	// for now, expect our own path in arguments[0], the IP address to 
 	// listen on in arguments[1] and the port in arguments[2]
 	assert(arguments.size() >= 1);
-	string ip(arguments.size() > 1 ? arguments[1] : CHAUSETTE_EPISODE28_DEFAULT_IP);
-	unsigned short port(arguments.size() > 2 ? boost::lexical_cast< unsigned short >(arguments[2]) : CHAUSETTE_EPISODE28_DEFAULT_PORT);
+	string ip(arguments.size() > 1 ? arguments[1] : CHAUSETTE_EPISODE35_DEFAULT_IP);
+	unsigned short port(arguments.size() > 2 ? boost::lexical_cast< unsigned short >(arguments[2]) : CHAUSETTE_EPISODE35_DEFAULT_PORT);
 	sockaddr_storage address;
 	memset(&address, 0, sizeof(address));
 	sockaddr_in &in_address = reinterpret_cast< sockaddr_in& >(address);
@@ -62,128 +96,405 @@ void Application::run(const Application:
 /*virtual */void Application::onNewConnection(Socket &socket)
 {
 	Socket &new_socket(server_->accept(socket));
-	remote_address_to_socket_.insert(RemoteAddressToSocket::value_type(new_socket.remote_address_, &new_socket));
-	pairSocket(new_socket);
+	new_socket.get(socket_state_attribute_id_) = expect_authentication_method_request__;
 }
 
 /*virtual */void Application::onDataReady(Socket &socket)
 {
-	vector< char > temp; // in case the socket is un-paired
-	bool needed_to_initialize(false);
-	vector< char >::size_type offset(0);
-	Socket *partner((&socket == un_paired_socket_) ? 0 : remote_address_to_socket_[any_cast< sockaddr_storage >(socket.get(target_address_attribute_id_))]);
-	if (partner &&
-		partner->get(data_to_send_attribute_id_).empty())
+	if (socket.get(receive_buffer_attribute_id_).empty())
 	{
-		partner->get(data_to_send_attribute_id_) = vector< char >(1024);
-		needed_to_initialize = true;
+		socket.get(receive_buffer_attribute_id_) = Buffer(default_buffer_size__);
 	}
-	else if (partner)
-	{ /* already have a buffer */ }
 	else
-	{
-		temp.resize(1024);
-		needed_to_initialize = true;
-	}
-	vector< char > &buffer = partner ? any_cast< vector< char >& >(partner->get(data_to_send_attribute_id_)) : temp;
-	if (!needed_to_initialize && buffer.empty())
+	{ /* already have a receive buffer */ }
+	Buffer &buffer(any_cast< Buffer& >(socket.get(receive_buffer_attribute_id_)));
+	Buffer::size_type offset(0);
+	if (buffer.empty())
 	{
 		buffer.resize(buffer.capacity());
 	}
-	else if (!needed_to_initialize)
+	else
 	{
 		offset = buffer.size();
-		if (buffer.capacity() <= offset + 1024)
+		if (buffer.capacity() - offset < minimal_available_buffer_size__)
 		{
-			buffer.resize(offset + 1024);
+			buffer.resize(offset + minimal_available_buffer_size__);
 		}
 		else
 		{
 			buffer.resize(buffer.capacity());
 		}
 	}
+	Buffer::size_type avail(buffer.size() - offset);
+	Buffer::pointer recv_ptr(&(buffer[offset]));
+	server_->read(socket, recv_ptr, &avail);
+	buffer.resize(offset + avail);
+	// here, according to the state of the socket, dispatch the data
+	if (socket.get(socket_state_attribute_id_).empty())
+	{
+		socket.get(socket_state_attribute_id_) = expect_authentication_method_request__;
+	}
 	else
-	{ /* needed to initialize - so no need to account for data already in the buffer */ }
-	unsigned int data_read(buffer.size() - offset);
-	char *read_ptr(&buffer[0]);
-	read_ptr += offset;
-	try
+	{ /* the socket already has a state */ }
+	switch (any_cast< SocketState >(socket.get(socket_state_attribute_id_).empty()))
 	{
-		server_->read(socket, read_ptr, &data_read);
-		buffer.resize(offset + data_read);
-		unsigned int data_written(buffer.size());
-		if (partner)
+	case expect_authentication_method_request__ :
+		onAuthenticationMethodRequest(socket);
+		break;
+	case expect_socks_request__ :
+		onSocksRequest(socket);
+		break;
+	}
+}
+
+/*virtual */void Application::onWriteReady(Socket &socket)
 		{
-			server_->write(*partner, &buffer[0], &data_written);
+	if (!socket.get(send_buffer_attribute_id_).empty())
+	{
+		Buffer &buffer(any_cast< Buffer& >(socket.get(send_buffer_attribute_id_)));
+		Buffer::size_type offset(0);
+		if (!buffer.empty())
+		{
+			Buffer::size_type avail(buffer.size());
+			Buffer::pointer send_ptr(&(buffer[0]));
+			server_->write(socket, send_ptr, &avail);
+			buffer.erase(buffer.begin(), buffer.begin() + avail);
+		}
+		else
+		{ /* nothing to send */ }
 		}
 		else
-		{ /* no partner to send data to */ }
-		buffer.erase(buffer.begin(), buffer.begin() + data_written);
+	{ /* nothing to send */ }
 	}
-	catch (const Server::NetworkError&)
+
+/*virtual */void Application::onExceptionalDataReady(Socket &socket)
 	{
-		// ignore this for now: the socket will have been dealt with but this is no reason for us to crash.
 	}
+
+/*virtual */void Application::onCloseSocket(Socket &socket)
+{
 }
 
-/*virtual */void Application::onWriteReady(Socket &socket)
+void Application::onAuthenticationMethodRequest(Socket &socket) const
 {
-	if (socket.get(data_to_send_attribute_id_).empty())
-	{ /* no-op */ }
+	using Vlinder::Chausette::RFC1928::VersionIdentifierMethodSelectionMessage;
+	using Vlinder::Chausette::RFC1928::MethodMessage;
+	Buffer &buffer(any_cast< Buffer& >(socket.get(receive_buffer_attribute_id_)));
+	if (buffer.size() < offsetof(VersionIdentifierMethodSelectionMessage, methods_) + 1)
+	{
+		throw InsufficientData("Not enough data for a version identifier/method selection message");
+	}
 	else
+	{ /* all is well so far */ }
+	VersionIdentifierMethodSelectionMessage *message(reinterpret_cast< VersionIdentifierMethodSelectionMessage* >(&buffer[0]));
+	if (message->ver_ != CHAUSETTE_EPISODE35_SOCKS_VERSION)
 	{
-		vector< char > &buffer = any_cast< vector< char >& >(socket.get(data_to_send_attribute_id_));
-		if (buffer.empty())
-		{ /* no-op */ }
+		throw WrongSocksVersion("Wrong socks version", CHAUSETTE_EPISODE35_SOCKS_VERSION, message->ver_);
+	}
+	else
+	{ /* the version is OK */ }
+	if (buffer.size() < offsetof(VersionIdentifierMethodSelectionMessage, methods_) + message->nmethods_)
+	{
+		throw InsufficientData("Not enough data for a version identifier/method selection message");
+	}
 		else
+	{ /* all is well so far */ }
+	/* As we haven't implemented any authentication methods yet, we only 
+	 * support "no authentication" - method 0. If it is not present among 
+	 * the methods, throw an exception. */
+	bool authentication_ok(false);
+	for (unsigned char *method = message->methods_; !authentication_ok && ((method - message->methods_) < message->nmethods_); ++method)
 		{
-			unsigned int data_written(buffer.size());
-			server_->write(socket, &buffer[0], &data_written);
-			buffer.erase(buffer.begin(), buffer.begin() + data_written);
+		authentication_ok = (*method == 0);
 		}
+	if (!authentication_ok)
+	{
+		throw NoSupportedAuthenticationMethod("No supported authentication method");
 	}
+	else
+	{ /* all is well */ }
+	MethodMessage methodMessage(CHAUSETTE_EPISODE35_SOCKS_VERSION, 0/* no authentication - put a constant here later */);
+	unsigned char *ptr(reinterpret_cast< unsigned char* >(&methodMessage));
+	queueDataToSend(socket, ptr, ptr + sizeof(methodMessage));
 }
 
-/*virtual */void Application::onExceptionalDataReady(Socket &socket)
+void Application::onSocksRequest(Socket &socket)
 {
+	using Vlinder::Chausette::RFC1928::SocksRequest;
+	Buffer &buffer(any_cast< Buffer& >(socket.get(receive_buffer_attribute_id_)));
+	if (buffer.size() < offsetof(SocksRequest, dst_addr_) + 1)
+	{
+		throw InsufficientData("Not enough data for a SOCKS request");
 }
-
-/*virtual */void Application::onCloseSocket(Socket &socket)
+	else
+	{ /* all is well so far */ }
+	SocksRequest *message(reinterpret_cast< SocksRequest* >(&buffer[0]));
+	if (message->ver_ != CHAUSETTE_EPISODE35_SOCKS_VERSION)
 {
-	RemoteAddressToSocket::iterator where(remote_address_to_socket_.find(socket.remote_address_));
-	assert(where != remote_address_to_socket_.end());
-	assert(where->second == &socket);
-	remote_address_to_socket_.erase(where);
-	unpairSocket(socket);
+		throw WrongSocksVersion("Wrong SOCKS version", CHAUSETTE_EPISODE35_SOCKS_VERSION, message->ver_);
 }
-
-void Application::pairSocket(Socket &socket)
+	else
+	{ /* the version is OK */ }
+	/* Inside the message, the port field is the only one that isn't 
+	 * necessarily in the same position as in the struct. The other
+	 * fields are where they should be - and we can get to the port 
+	 * field by pasing the address type field. */
+	sockaddr_storage address;
+	memset(&address, 0, sizeof(address));
+	switch (message->atyp_)
 {
-	if (un_paired_socket_)
+	case 1 :
 	{
-		un_paired_socket_->get(target_address_attribute_id_) = socket.remote_address_;
-		socket.get(target_address_attribute_id_) = un_paired_socket_->remote_address_;
-		un_paired_socket_ = 0;
+		// IP V4 address: X'01'
+		if (buffer.size() < offsetof(SocksRequest, dst_addr_) + 6 /* four for the address, two for the port */)
+		{
+			throw InsufficientData("Not enough data for a SOCKS request");
+		}
+		else
+		{ /* all is well so far */ }
+		address.ss_family = AF_INET;
+		sockaddr_in *a4(reinterpret_cast< sockaddr_in* >(&address));
+		memcpy(&a4->sin_addr, message->dst_addr_, 4);
+		memcpy(&a4->sin_port, message->dst_addr_ + 4, 2);
+		break;
+	}
+	case 3 :
+	{
+		// DOMAINNAME: X'03'
+		unsigned char hostname_length(message->dst_addr_[0]);
+		if (buffer.size() < offsetof(SocksRequest, dst_addr_) + hostname_length + 2 /* hostname_length for the address, two for the port */)
+		{
+			throw InsufficientData("Not enough data for a SOCKS request");
 	}
 	else
+		{ /* all is well so far */ }
+		char *hostname_begin(reinterpret_cast< char* >(message->dst_addr_ + 1));
+		char *hostname_end = hostname_begin + hostname_length;
+		unsigned short port(*reinterpret_cast< unsigned short* >(hostname_end));
+		*hostname_end = 0; // cap it off
+		hostent *host_entry(gethostbyname(hostname_begin));
+		if (!host_entry)
 	{
-		un_paired_socket_ = &socket;
+			throw NameResolutionError("Name resolution error", GetLastError());
+		}
+		else
+		{ /* resolved OK */ }
+		switch (host_entry->h_addrtype)
+		{
+		case AF_INET :
+		{
+			sockaddr_in *a4(reinterpret_cast< sockaddr_in* >(&address));
+			memcpy(&a4->sin_addr, host_entry->h_addr_list[0], 4);
+			a4->sin_port = port;
+			break;
+		}
+		case AF_INET6 :
+		{
+			sockaddr_in6 *a6(reinterpret_cast< sockaddr_in6* >(&address));
+			memcpy(&a6->sin6_addr, host_entry->h_addr_list[0], 16);
+			a6->sin6_port = port;
+			break;
+		}
 	}
+		break;
 }
+	case 4 :
+	{
+		// IP V6 address: X'04'
+		if (buffer.size() < offsetof(SocksRequest, dst_addr_) + 16 + 2 /* four for the address, two for the port */)
+		{
+			throw InsufficientData("Not enough data for a SOCKS request");
+		}
+		else
+		{ /* all is well so far */ }
+		address.ss_family = AF_INET6;
+		sockaddr_in6 *a6(reinterpret_cast< sockaddr_in6* >(&address));
+		memcpy(&a6->sin6_addr, message->dst_addr_, 16);
+		memcpy(&a6->sin6_port, message->dst_addr_ + 16, 2);
+		break;
+	}
+	default :
+		throw UnknownAddressType("Unknown address type", message->atyp_);
+	}
+	switch (message->cmd_)
+	{
+	case 1 :
+		// CONNECT X'01'
+		doConnect(socket, address);
+		break;
+	case 2 :
+		// BIND X'02' //TODO
+	case 3 :
+		// UDP ASSOCIATE X'03' //TODO
+		break;
+	}
+}
+
+void Application::doConnect(Socket &parent_socket, const sockaddr_storage &address)
+{
+	/* In the reply to a CONNECT, BND.PORT contains the port number that the
+	 * server assigned to connect to the target host, while BND.ADDR
+	 * contains the associated IP address.  The supplied BND.ADDR is often
+	 * different from the IP address that the client uses to reach the SOCKS
+	 * server, since such servers are often multi-homed.  It is expected
+	 * that the SOCKS server will use DST.ADDR and DST.PORT, and the
+	 * client-side source address and port in evaluating the CONNECT
+	 * request. */
+	using Vlinder::Chausette::RFC1928::SocksReply;
+	SocksReply reply;
+	memset(&reply, 0, sizeof(reply));
+	reply.ver_ = CHAUSETTE_EPISODE35_SOCKS_VERSION;
+	/* attempt to connect to the target address. If successful, open a 
+	 * server socket for the client to connect to, and forward the data
+	 * between the two. */
+	int sock_fd(::socket(address.ss_family, SOCK_STREAM, IPPROTO_TCP));
+	if (sock_fd == INVALID_SOCKET)
+	{
+		reply.rep_ = 1; // X'01' general SOCKS server failure
+		parent_socket.get(socks_reply_attribute_id_) = reply;
+		setSocketState(parent_socket, send_socks_reply__);
+	}
+	else
+	{ /* all is well */ }
+	FDGuard fd_guard(sock_fd);
+	u_long arg(1);
+	if (ioctlsocket(sock_fd, FIONBIO, &arg) != 0)
+	{
+		reply.rep_ = 1; // X'01' general SOCKS server failure
+		parent_socket.get(socks_reply_attribute_id_) = reply;
+		setSocketState(parent_socket, send_socks_reply__);
+		throw SocketIOCTLFailed("Failed to set socket to non-blocking", WSAGetLastError());
+	}
+	else
+	{ /* all is well so far */ }
+	if (connect(sock_fd, (const sockaddr*)&address, sizeof(address)) != 0)
+	{
+		unsigned long last_error(WSAGetLastError());
+		switch (last_error)
+		{
+		// logic errors
+		case WSANOTINITIALISED :
+			/* A successful WSAStartup call must occur before using this function. */
+		case WSAEADDRINUSE :
+			/* The socket's local address is already in use and the socket was not 
+			 * marked to allow address reuse with SO_REUSEADDR. This error usually 
+			 * occurs when executing bind, but could be delayed until the connect 
+			 * function if the bind was to a wildcard address (INADDR_ANY or 
+			 * in6addr_any) for the local IP address. A specific address needs to 
+			 * be implicitly bound by the connect function. */
+		case WSAEINTR :
+			/* The blocking Windows Socket 1.1 call was canceled through 
+			 * WSACancelBlockingCall. */
+		case WSAEINPROGRESS :
+			/* A blocking Windows Sockets 1.1 call is in progress, or the service 
+			 * provider is still processing a callback function. */
+		case WSAEALREADY :
+			/* A nonblocking connect call is in progress on the specified socket.
+			 * Note  In order to preserve backward compatibility, this error is reported 
+			 * as WSAEINVAL to Windows Sockets 1.1 applications that link to either 
+			 * Winsock.dll or Wsock32.dll. */
+		case WSAEAFNOSUPPORT :
+			/* Addresses in the specified family cannot be used with this socket. */
+		case WSAEINVAL :
+			/* The parameter s is a listening socket. */
+		case WSAEISCONN :
+			/* The socket is already connected (connection-oriented sockets only). */
+		case WSAENOTSOCK :
+			/* The descriptor specified in the s parameter is not a socket. */
+		case WSAEACCES :
+			/* An attempt to connect a datagram socket to broadcast address failed 
+			 * because setsockopt option SO_BROADCAST is not enabled. */
+		default :
+			reply.rep_ = 1; // X'01' general SOCKS server failure
+			parent_socket.get(socks_reply_attribute_id_) = reply;
+			setSocketState(parent_socket, send_socks_reply__);
+			throw ConnectError("Internal error calling connect", last_error);
+
+		// run-time errors outside the caller's control
+		case WSAENETDOWN :
+			/* The network subsystem has failed. */
+			reply.rep_ = 1; // X'01' general SOCKS server failure
+			parent_socket.get(socks_reply_attribute_id_) = reply;
+			setSocketState(parent_socket, send_socks_reply__);
+			break;
+		case WSAECONNREFUSED :
+			/* The attempt to connect was forcefully rejected. */
+			reply.rep_ = 5; // X'05' Connection refused
+			parent_socket.get(socks_reply_attribute_id_) = reply;
+			setSocketState(parent_socket, send_socks_reply__);
+			break;
+		case WSAENETUNREACH :
+			/* The network cannot be reached from this host at this time. */
+			reply.rep_ = 3; // X'03' Network unreachable
+			parent_socket.get(socks_reply_attribute_id_) = reply;
+			setSocketState(parent_socket, send_socks_reply__);
+			break;
+		case WSAEHOSTUNREACH :
+			/* A socket operation was attempted to an unreachable host. */
+			reply.rep_ = 4; // X'04' Host unreachable
+			parent_socket.get(socks_reply_attribute_id_) = reply;
+			setSocketState(parent_socket, send_socks_reply__);
+			break;
+		case WSAENOBUFS :
+			/* Note  No buffer space is available. The socket cannot be connected. */
+			reply.rep_ = 1; // X'01' general SOCKS server failure
+			parent_socket.get(socks_reply_attribute_id_) = reply;
+			setSocketState(parent_socket, send_socks_reply__);
+			break;
+		case WSAETIMEDOUT :
+			/* An attempt to connect timed out without establishing a connection. */
+			reply.rep_ = 6; // X'06' TTL expired
+			parent_socket.get(socks_reply_attribute_id_) = reply;
+			setSocketState(parent_socket, send_socks_reply__);
+			break;
+
+		// run-time errors that point to bugs in the caller/client
+		case WSAEADDRNOTAVAIL :
+			/* The remote address is not a valid address (such as INADDR_ANY or in6addr_any) . */
+		case WSAEFAULT :
+			/* The sockaddr structure pointed to by the name contains incorrect address format 
+			 * for the associated address family or the namelen parameter is too small. This error 
+			 * is also returned if the sockaddr structure pointed to by the name parameter with 
+			 * a length specified in the namelen parameter is not in a valid part of the user 
+			 * address space. */
+			reply.rep_ = 8; // X'08' Address type not supported
+			parent_socket.get(socks_reply_attribute_id_) = reply;
+			setSocketState(parent_socket, send_socks_reply__);
+			break;
+
+		// "normal" errors
+		case WSAEWOULDBLOCK :
+			/* The socket is marked as nonblocking and the connection cannot be completed immediately. */
+			parent_socket.get(socks_reply_attribute_id_) = reply;
+			setSocketState(parent_socket, wait_socks_reply__);
+			break;
+		}
+	}
+	else
+	{ /* all is well */ }
+}
+
+void Application::setSocketState(Socket &socket, Application::SocketState state)
+{
+	SocketState current_state(any_cast< SocketState >(socket.get(socket_state_attribute_id_)));
+	//TODO
+}
+
 
-void Application::unpairSocket(Socket &socket)
+void Application::queueDataToSend(Socket &socket, unsigned char *begin, unsigned char *end) const
 {
-	// find the socket this one was paired to
-	if (un_paired_socket_ == &socket)
+	Buffer *buffer(0);
+	if (!socket.get(send_buffer_attribute_id_).empty())
 	{
-		un_paired_socket_ = 0;
+		buffer = &(any_cast< Buffer& >(socket.get(send_buffer_attribute_id_)));
 	}
 	else
 	{
-		assert(!socket.get(target_address_attribute_id_).empty());
-		sockaddr_storage target_address(any_cast< sockaddr_storage >(socket.get(target_address_attribute_id_)));
-		Socket *other_socket(remote_address_to_socket_[target_address]);
		other_socket->get(target_address_attribute_id_) = any();
-		pairSocket(*other_socket);
+		socket.get(send_buffer_attribute_id_) = Buffer();
+		buffer = &(any_cast< Buffer& >(socket.get(send_buffer_attribute_id_)));
 	}
+	copy(begin, end, back_inserter(*buffer));
+	unsigned int data(buffer->size());
+	server_->write(socket, &((*buffer)[0]), &data);
+	buffer->erase(buffer->begin(), buffer->begin() + data);
 }

We are going to have to manage different kinds of sockets: sockets for command-and-control (which we will call “control sockets” from here on) and sockets that need to be proxied. At first, we’ll just do TCP proxying but, eventually, we will also proxy UDP.

The way this is going to work, we will have only one thread to do most of the work — so we’ll have to be relatively smart about multiplexing our work. In this context, multiplexing means that we tell the sockets API — and therefore the underlying TCP/IP stack — what we want it to do, but we don’t wait around for it to perform its tasks: rather, we tell it to notify us whenever a task is finished and will carry it on from there. For that to work we do, of course, need to know what task it was performing. We do that by associating a state with each socket, which we put in one of the attributes, which we will call socket_state_attribute_id_. That attribute is allocated in the constructor and used throughout the code. For example, in onDataReady, it is used to know what to do with the incoming data:

To see the code click here.To hide the code click here.

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
// here, according to the state of the socket, dispatch the data
if (socket.get(socket_state_attribute_id_).empty())
{
	socket.get(socket_state_attribute_id_) = expect_authentication_method_request__;
}
else
{ /* the socket already has a state */ }
switch (any_cast< SocketState >(socket.get(socket_state_attribute_id_).empty()))
{
case expect_authentication_method_request__ :
	onAuthenticationMethodRequest(socket);
	break;
case expect_socks_request__ :
	onSocksRequest(socket);
	break;
}

This basically turns the socket itself into a state machine. The available states would, of course, be different according to the role of the socket (i.e. a data socket would never be expected to send a SOCKS request).

We will look into state machines in the next installment.

Another important/interesting part of the new code is the parsing of SOCKS requests. We will look at the actions they imply later, but we will take a closer look at the parsing now:

The first step is to read the data from the buffer. If there isn’t enough data in the buffer, the request cannot be parsed and should be set aside. Some preliminary checks can be performed on the request immediately, as the following snippet of code will show:

To see the code click here.To hide the code click here.

181
182
183
184
185
186
187
188
189
190
191
void Application::onAuthenticationMethodRequest(Socket &socket) const
{
	using Vlinder::Chausette::RFC1928::VersionIdentifierMethodSelectionMessage;
	using Vlinder::Chausette::RFC1928::MethodMessage;
	Buffer &buffer(any_cast< Buffer& >(socket.get(receive_buffer_attribute_id_)));
	if (buffer.size() < offsetof(VersionIdentifierMethodSelectionMessage, methods_) + 1)
	{
		throw InsufficientData("Not enough data for a version identifier/method selection message");
	}
	else
	{ /* all is well so far */ }

Now that we know we at least have enough data, we can treat the data as a message and, assuming the data in the buffer is either properly aligned, or we don’t care about the alignment, we can simply cast the buffer to the appropriate message type and do some more preliminary checks:

To see the code click here.To hide the code click here.

192
193
194
195
196
197
198
199
200
201
202
203
204
205
	VersionIdentifierMethodSelectionMessage *message(reinterpret_cast< VersionIdentifierMethodSelectionMessage* >(&buffer[0]));
	if (message->ver_ != CHAUSETTE_EPISODE35_SOCKS_VERSION)
	{
		throw WrongSocksVersion("Wrong socks version", CHAUSETTE_EPISODE35_SOCKS_VERSION, message->ver_);
	}
	else
	{ /* the version is OK */ }
	unsigned int message_size(offsetof(VersionIdentifierMethodSelectionMessage, methods_) + message->nmethods_);
	if (buffer.size() < message_size)
	{
		throw InsufficientData("Not enough data for a version identifier/method selection message");
	}
	else
	{ /* all is well so far */ }

In this specific message, we’re setting up an authentication method – we won’t support authentication right away, so for now, only method 0 is supported. According to the protocol, this means that at least one of the proposed authentication methods, of which there can be up to 255, has to be 0.

To see the code click here.To hide the code click here.

209
210
211
212
213
214
215
216
217
218
219
	bool authentication_ok(false);
	for (unsigned char *method = message->methods_; !authentication_ok && ((method - message->methods_) < message->nmethods_); ++method)
	{
		authentication_ok = (*method == 0);
	}
	if (!authentication_ok)
	{
		throw NoSupportedAuthenticationMethod("No supported authentication method");
	}
	else
	{ /* all is well */ }

Once we’ve established that we can, indeed, authenticate (or rather: that we don’t need to) we can set up our reply, and send it.

To see the code click here.To hide the code click here.

220
221
222
	MethodMessage method_message(CHAUSETTE_EPISODE35_SOCKS_VERSION, 0/* no authentication - put a constant here later */);
	unsigned char *ptr(reinterpret_cast< unsigned char* >(&method_message));
	queueDataToSend(socket, ptr, ptr + sizeof(method_message));

Then, we set the state of the socket to one in which we expect SOCKS requests — now that the initial handshaking is done — and consume the data we’ve already handled.

To see the code click here.To hide the code click here.

223
224
	socket.get(socket_state_attribute_id_) = expect_socks_request__;
	buffer.erase(buffer.begin(), buffer.begin() + message_size);

SOCKS requests are handled pretty much the same way, but contain an address — the IP address of the host we will be proxying with. According to the address type, there are different ways to extract the IP address of the host we will be proxying with. It can either contain the actual IPv4 address, as in the next bit of code:

To see the code click here.To hide the code click here.

242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
	sockaddr_storage address;
	memset(&address, 0, sizeof(address));
	switch (message->atyp_)
	{
	case 1 :
	{
		// IP V4 address: X'01'
		if (buffer.size() < offsetof(SocksRequest, dst_addr_) + 6 /* four for the address, two for the port */)
		{
			throw InsufficientData("Not enough data for a SOCKS request");
		}
		else
		{ /* all is well so far */ }
		address.ss_family = AF_INET;
		sockaddr_in *a4(reinterpret_cast< sockaddr_in* >(&address));
		memcpy(&a4->sin_addr, message->dst_addr_, 4);
		memcpy(&a4->sin_port, message->dst_addr_ + 4, 2);
		break;
	}


or it can be a domain name, in which case the address has to be looked up.

Address lookup is done synchronously, using gethostbyname, but should eventually be done asynchronously as calling this function may take some time — time we don’t necessarily want to spend waiting.

To see the code click here.To hide the code click here.

261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
	case 3 :
	{
		// DOMAINNAME: X'03'
		unsigned char hostname_length(message->dst_addr_[0]);
		if (buffer.size() < offsetof(SocksRequest, dst_addr_) + hostname_length + 2 /* hostname_length for the address, two for the port */)
		{
			throw InsufficientData("Not enough data for a SOCKS request");
		}
		else
		{ /* all is well so far */ }
		char *hostname_begin(reinterpret_cast< char* >(message->dst_addr_ + 1));
		char *hostname_end = hostname_begin + hostname_length;
		unsigned short port(*reinterpret_cast< unsigned short* >(hostname_end));
		*hostname_end = 0; // cap it off
		hostent *host_entry(gethostbyname(hostname_begin));
		if (!host_entry)
		{
			throw NameResolutionError("Name resolution error", GetLastError());
		}
		else
		{ /* resolved OK */ }
		switch (host_entry->h_addrtype)
		{
		case AF_INET :
		{
			sockaddr_in *a4(reinterpret_cast< sockaddr_in* >(&address));
			memcpy(&a4->sin_addr, host_entry->h_addr_list[0], 4);
			a4->sin_port = port;
			break;
		}
		case AF_INET6 :
		{
			sockaddr_in6 *a6(reinterpret_cast< sockaddr_in6* >(&address));
			memcpy(&a6->sin6_addr, host_entry->h_addr_list[0], 16);
			a6->sin6_port = port;
			break;
		}
		}
		break;
	}

The address can also be an IPv6, in which case, like in the case of an IPv4 address, it is simply copied.

To see the code click here.To hide the code click here.

301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
	case 4 :
	{
		// IP V6 address: X'04'
		if (buffer.size() < offsetof(SocksRequest, dst_addr_) + 16 + 2 /* four for the address, two for the port */)
		{
			throw InsufficientData("Not enough data for a SOCKS request");
		}
		else
		{ /* all is well so far */ }
		address.ss_family = AF_INET6;
		sockaddr_in6 *a6(reinterpret_cast< sockaddr_in6* >(&address));
		memcpy(&a6->sin6_addr, message->dst_addr_, 16);
		memcpy(&a6->sin6_port, message->dst_addr_ + 16, 2);
		break;
	}
	default :
		throw UnknownAddressType("Unknown address type", message->atyp_);
	}

There are a few things you should notice: invalid IP addresses and domain names are not necessarily a problem — they won’t work, of course, but there is really no other way to check for them than to try them out. The only thing we really check is that there’s enough data in the request to contain the information being extracted from the request.

Socks requests are fairly simple and don’t contain anything like checksums or cryptographic authentication (although cryptographic authentication can be part of the protocol, if supported by both sides) so other than these few checks, there is really nothing to be done.

Queueing data to send looks like this:

To see the code click here.To hide the code click here.

503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
void Application::queueDataToSend(Socket &socket, unsigned char *begin, unsigned char *end) const
{
	Buffer *buffer(0);
	if (!socket.get(send_buffer_attribute_id_).empty())
	{
		buffer = &(any_cast< Buffer& >(socket.get(send_buffer_attribute_id_)));
	}
	else
	{
		socket.get(send_buffer_attribute_id_) = Buffer();
		buffer = &(any_cast< Buffer& >(socket.get(send_buffer_attribute_id_)));
	}
	copy(begin, end, back_inserter(*buffer));
	unsigned int data(buffer->size());
	server_->write(socket, &((*buffer)[0]), &data);
	buffer->erase(buffer->begin(), buffer->begin() + data);
}

First thing we do is get a buffer to put the data into. That buffer is associated with the socket itself so that, if the socket isn’t ready for data being written to it, we can write the data to the socket when the socket is ready, in onWriteReady. The Server code will know what to do with a call to write if the socket isn’t ready, so we can simply call it and remove, from the buffer, any data that we could send.

The onWriteReady method is similar, of course:

To see the code click here.To hide the code click here.

152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/*virtual */void Application::onWriteReady(Socket &socket)
{
	if (!socket.get(send_buffer_attribute_id_).empty())
	{
		Buffer &buffer(any_cast< Buffer& >(socket.get(send_buffer_attribute_id_)));
		Buffer::size_type offset(0);
		if (!buffer.empty())
		{
			Buffer::size_type avail(buffer.size());
			Buffer::pointer send_ptr(&(buffer[0]));
			server_->write(socket, send_ptr, &avail);
			buffer.erase(buffer.begin(), buffer.begin() + avail);
		}
		else
		{ /* nothing to send */ }
	}
	else
	{ /* nothing to send */ }
}

As you can see, it simply retrieves the buffer if there is one and sends as much of the available data as possible.

In the next few installments, we will look at the following:

  1. state machines: as mentioned above, every socket will be construed as a state machine — but we will look at state machines in other contexts as well.
  2. threads: as mentioned above, some actions (such as DNS queries) will need to be performed asynchronously. We will do that by creating a thread to handle those requests.
  3. the Command pattern which we’ll be using to communicate with the thread

About rlc

Software Analyst in embedded systems and C++, C and VHDL developer, I specialize in security, communications protocols and time synchronization, and am interested in concurrency, generic meta-programming and functional programming and their practical applications. I take a pragmatic approach to project management, focusing on the management of risk and scope. I have over two decades of experience as a software professional and a background in science.
This entry was posted in C++ for the self-taught and tagged . Bookmark the permalink.