Conversions when inherited and a constructor

This is a problem I found when trying to over come a simple desire to have a small sized class that it would be best to move around by value rather than reference. The idea was to have base functions using one type and a conversion operator to allow for the other type to also be its argument.

Yes this is abusing what templates are for, but the code did not need duplication. It also got this complex because C++ does not allow structure casts/conversion operators.

#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <stdlib.h>
#include <stdint.h>
#include <memory.h>

// forward declaration.
class utf16;

struct utf8Span
{
	utf8Span() = default; // ZII
	operator const char*() { return (const char*)m_span; };
    utf8Span(uint8_t* text) : m_span(text){};
	uint8_t*	m_span{0};
};

struct utf16Span
{
	utf16Span() = default; // ZII
	operator const wchar_t*() { return (const wchar_t*)m_span; };
    utf16Span(uint8_t* text) : m_span(text){};
	uint8_t*	m_span{0};
};

class utf8
{
public:
	utf8()
    {
        m_text = 0;
        m_utf8Span.m_span = 0;
        m_utf16Span.m_span = 0;
    }; // ZII
	operator const char*() { return (const char*)m_text; };
	operator const utf8Span() { return m_utf8Span; };

	utf8(utf16Span text16Span);

	utf8(const char* text)
	{
		size_t size = strlen(text) + 1;
		m_text = (uint8_t*)malloc(size);
		memcpy(m_text, text, size);
	}
	utf8& operator=(const utf8& that)
	{
		if (m_text == nullptr) free(m_text);
		size_t size = strlen((const char*)that.m_text) + 1;
		m_text = (uint8_t*)malloc(size);
		memcpy(m_text, that.m_text, size);
	}
	utf8(const utf8& that)
	{
		if (m_text == nullptr) free(m_text);
		size_t size = strlen((const char*)that.m_text) + 1;
		m_text = (uint8_t*)malloc(size);
		memcpy(m_text, that.m_text, size);
	}	~utf8() { if (m_text == nullptr) free(m_text); }

protected:
	union
	{
	utf8Span	m_utf8Span{0};
	uint8_t*	m_text;
	utf16Span	m_utf16Span;
	};
};

class utf16 : protected utf8
{
public:
	operator const wchar_t*() { return (const wchar_t*)m_text; };
	operator const utf16Span() { return m_utf16Span; };
	utf16(utf8Span text8Span)
	{
		uint8_t* bytes = (uint8_t*)(const char*)text8Span;
		size_t size = 2 * (strlen((const char*)text8Span) + 1);
		m_text = (uint8_t*)calloc(size, 1);
		for (size_t i = 0; i < size; i += 2) m_text[i] = bytes[i/2];
	}
	utf16(const wchar_t* text16)
	{
		size_t size = 2* (wcslen(text16) + 1);
		m_text = (uint8_t*)malloc(size);
		memcpy(m_text, text16, size);
	}
	utf16& operator=(const utf16& that16)
	{
		if (m_text == nullptr) free(m_text);
		size_t size = 2 * (wcslen((wchar_t*)that16.m_text) + 1);
		m_text = (uint8_t*)malloc(size);
		memcpy(m_text, that16.m_text, size);
	}
	utf16(const utf16& that16)
	{
		if (m_text == nullptr) free(m_text);
		size_t size = 2 * (wcslen((wchar_t*)that16.m_text) + 1);
		m_text = (uint8_t*)malloc(size);
		memcpy(m_text, that16.m_text, size);
	}	~utf16() { if (m_text == nullptr) free(m_text); }
};

utf8::utf8(utf16Span text16Span)
{
		uint8_t* bytes = (uint8_t*)(const wchar_t*)text16Span;
		size_t size = wcslen(text16Span) + 1;
		m_text = (uint8_t*)calloc(size, 1);
		for (size_t i = 0; i < size; i++) m_text[i] = bytes[i*2];
}


int main()
{
	utf8 aaWord8("This");
	utf8 aaNext8 = aaWord8;
	utf8 aaAgain8 = utf8("Again");
	utf16 aaWord16(L"This");
	utf16 aaNext16 = aaWord16;
	utf16 aaAgain16 = utf16(L"Again");
	
	utf8Span aaWord8Span((uint8_t*)((const char *)aaWord8 + 1));
	utf16Span aaWord16Span((uint8_t*)((const wchar_t *)aaWord16 + 1));
	utf16 aaMade16(aaWord8Span);
	utf8 aaMade8(aaWord16Span);
	
	utf16 aaMade16a(aaWord8);
	utf8 aaMade8a(/*(utf16Span)*/aaWord16);
    return 0;
}

Multi Dimensional Dynamically Allocated Arrays

There is a nice trick to get a dynamically backed contiguous amount of memory to index like a static array. This is easy to find on the internet, however even thought the C++ standard talks about how to do a 3D version I could not find any example code.

From the C++ standard page (https://en.cppreference.com/w/cpp/language/operators)

“operator[] can only take one subscript. In order to provide multidimensional array access semantics, e.g. to implement a 3D array access a[i][j][k] = x;, operator[] has to return a reference to a 2D plane, which has to have its own operator[] which returns a reference to a 1D row, which has to have operator[] which returns a reference to the element. To avoid this complexity, some libraries opt for overloading operator() instead, so that 3D access expressions have the Fortran-like syntax a(i, j, k) = x;”

To be clear, this is what we are trying to obtain:

    array a;
    a.setDimensions(3, 4, 5);
    a[2][0][4] = 42;
    // ... vs ...
    a(2, 0, 4) = 42;

So here it is how the C syntax style can be done for 3D.

First the version that explains in detail what is going on. Here you can see the plane object talked about in the standard. This does not return a reference as the size is small enough.

struct array
{
	struct row
	{
		row(array* array, int i, int j) 
			: _array{array}, _i{i}, _j{j} {};
		inline int &operator[](int k) 
		{ 
			assert(k < _array->_kSize);
			int offset = 
				(_i * _array->_kSize * _array->_jSize) +
				(_j * _array->_kSize) +
				(k);
			return *(_array->_data + offset); 
		}
		int		_i;
		int		_j;
		array*	_array;
	};
	struct plane
	{
		plane(array* array, int i) 
			: _array{array}, _i{i} {};
		inline row operator[](int j) 
		{ 
			assert(j < _array->_jSize); 
			row newRow(_array , _i, j); 
			return newRow;
		}
		int	_i;
		array*	_array;
	};
	inline plane operator[](int i)
	{
		assert(i < this->_iSize);
		plane newPlane(this, i); 
		return newPlane;
	}
	void setDimensions(int x, int y, int z)
	{
		_iSize = x;
		_jSize = y;
		_kSize = z;
1	}
	int		_data[24];
	int		_iSize{1};
	int		_jSize{1};
	int		_kSize{1};
};

This is a faster version that uses the typical approach where the returned value is a pointer to the objects so that the last [ ] addressing is done ‘for free’.

struct arrayFast
{
	struct plane
	{
		plane(int* data, int kSize) 
			: _data{data}, _kSize{kSize} {};
		inline int* operator[](int j)
		{
			return _data + j * _kSize;
		}
		int*	_data;
		int	_kSize;
	};
	inline plane operator[](int i)
	{
		int offset = i * _jSize * _kSize;
		plane newPlane(_data + offset, _kSize);
		return newPlane;
	}
	void setDimensions(int x, int y, int z)
	{
		_iSize = x;
		_jSize = y;
		_kSize = z;
	}
	int		_data[24];
	int		_kSize{1};
	int		_jSize{1};
	int		_iSize{1};
};

Finally the recommended solution of using operator() instead.

struct arrayFaster
{
	inline int& operator()(int i, int j, int k)
	{
		int offset = (i * _kSize * _jSize) + (j * _kSize) + (k);
		return *(_data + offset);
	}
	void setDimensions(int x, int y, int z)
	{
		_kSize = z;
		_jSize = y;
		_iSize = x;
	}
	int		_data[24];
	int		_iSize{1};
	int		_jSize{1};
	int		_kSize{1};
};

This last one looks much easier to understand and you would expect it to be the fastest solution as well. The code is direct and the maths in one place.

I did a quick check on Godbolt (https://godbolt.org/z/EP36qooaE) and it looks like the 2nd and last options are very similar in instructions. I did not speed test these, but it is interesting enough that for now I will be using the traditional C syntax solution.

Using assert calls to check coverage

I heard a great development process idea from Tom Forsyth where he explained that he litters his code with assert(true) to make sure that he hits all the key code paths during early testing.

This got me thinking about asserts and code coverage.

Code coverage is an evil that kills the sole of testers. It is treated as an absolute and they are forced to try to hit 100%. A much more efficient approach is to have humans spend real thinking power about where and what needs to have code coverage checked. Sure this is not black box testing, but a clever mix of black box, API, edge case, and coverage testing is a much better approach.

Continue reading “Using assert calls to check coverage”

Compile time functions

While C++ lags behind new languages for support at compile time, there is some ability to get some of the advantages.

A simple case is to create a allow a runtime function that is simple enough to be run at compile time. This has the nice feature that if the function is not able to be used at compile time it will still work at runtime. Thus this can be done for many functions:

static constexpr u32 function(const char* const int)

However this flexibility also has the problem that you cant be sure it will be run at run time.

Continue reading “Compile time functions”

Never Nester, Goto, RAII, and Defer

Coding when you are using an OS or library that requires a series of calls that build on each other to get the final result requires a set of checks to make sure each stage is successful. If some of the stages also require it partner call to free/close/destroy the items, then the programming logic requires you to be careful in how you tear down the items if there is an error in the middle of the process.

This can be coded several ways, and this post discusses a series of approaches in C++. The code selected for the example is performing Windows OS calls. These are done at the lowest API level without any other library or framework. The code has three places where tear down is required. The flow is complex enough to stop a simple reorganisation of the code to perform the same task. There is no addition error management present so make it easier to read. It is a fully working program and it gets a list of USB devices that the program is allowed access to.

Continue reading “Never Nester, Goto, RAII, and Defer”

Reasons for Using C++ 17

Using the logic that we should use tools that are as simple as possible and only adopt complexity where it is absolutely necessary, it feels like I could use a very early version of C++. What follows are the reasons why I have increased that to C++17. The idea will be to only use these feature and no more. Any new use will be add to this page.

#if !((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
    #error Requires C++17 or higher
#endif
Continue reading “Reasons for Using C++ 17”

Screen Find – Seeing the Wood in the Trees

I hope you have a nice big screen – or several of them. Nice huh?

Well all this real estate comes with a price. Some times the screens can hold so much information it is hard to find what you are looking for. Would it not be nice to be able to search for the word you are looking for in a huge list of dense text?

Sure almost every app has search, but can you search the dialogue boxes, the drop down menus, the image of a poster?

So why not have a system wide special key combo that triggered a find dialogue box. Type in ‘comment’ and the screens are captured, OCR’ed and then if the text is found an overlay highlight shows you where the word is.

Testing

C++ Coding Standards

The following is not intended to generate a holy war, but merely a place for me to remind myself of the decisions I took along the way to get to a consistent style for any new code that I write.

#pragma warning( push, 3 )
#include <stdio.h>
#pragma warning( pop )

// /Wall Used. Compiler warnings turned off.
#pragma warning (disable :  5045) // Spectre code insertion warning.

class entity
{
    static constexpr size_t     _maxEntities = 1000;
    entity(char* name)
    {
        _name = name;
    };
    void set()
    char    _name[8];
};

inline float _floor(const float& a)
{
    if (a == 0.0f) return 0.0f;
    return floorf(a);
}
Continue reading “C++ Coding Standards”

C Strings in C++ and Deep Copies

I was told on Stack Overflow:

“A pointer is just a pointer.”

And that is true of course. To go beyond that we have structures and classes which have explicit construction and operators. However, C++ has inherited C strings which are just a pointer to the first char, but they also have a long history where the construction and operations are well known. So, when a C String is given to a map or vector, it may be thought that it could be doing a deep copy. After all, it is known (by implicit rules) how this could be completed. The size is defined. It is just not stored. Also, the storage is clear – just not managed.

Continue reading “C Strings in C++ and Deep Copies”