Time Complexity
With partition, the elements compared each time are as follow:
n, n/2, n/2, n/4, n/4, n/4, n/4, ..., 1, 1, ..., 1 (n ones)
And their sum is:
n, n, n, ..., n
But how much n?
Since n is divided to 1 by 2, there sure is log2(n) number of n.
So the time complexity is: n*log2(n)
Unique-elements sorting implement
From Quicksort - wikipedia:
algorithm quicksort(A, lo, hi) is
if lo < hi then
p := partition(A, lo, hi)
quicksort(A, lo, p)
quicksort(A, p + 1, hi)
algorithm partition(A, lo, hi) is
pivot := A[lo]
i := lo - 1
j := hi + 1
loop forever
do
i := i + 1
while A[i] < pivot
do
j := j - 1
while A[j] > pivot
if i >= j then
return j
swap A[i] with A[j]
My C++ implement:
#include <cstdio>
#include <algorithm>
int partition(int *arr, int lo, int hi)
{
int pivot = arr[lo];
while (true)
{
while (arr[lo] < pivot) ++lo;
while (arr[hi] > pivot) --hi;
if (lo >= hi) return lo;
std::swap(arr[lo], arr[hi]);
}
}
void quicksort(int *arr, int lo, int hi)
{
if (lo >= hi) return;
int p = partition(arr, lo, hi);
quicksort(arr, lo, p-1);
quicksort(arr, p+1, hi);
}
int main()
{
int A[] = {
15, 16, 10, 9, 4, 3, 6, 1, 7, 2, 8, 14
};
const int len = sizeof(A)/sizeof(A[0]);
quicksort(A, 0, len-1);
for (int a = 0; a < len; ++a)
printf("%d ", A[a]);
return 0;
}
Duplicate-elements sorting implement
At first, I coded something like this:
#include <cstdio>
int partition(int *arr, int f, int l)
{
int lo = f, hi = l;
int pivot = lo;
int pivot_val = arr[lo];
while (lo < hi)
{
while (hi > lo && arr[hi] >= pivot_val) --hi;
arr[pivot] = arr[hi]; pivot = hi;
while (lo < hi && arr[lo] <= pivot_val) ++lo;
arr[pivot] = arr[lo]; pivot = lo;
}
arr[pivot] = pivot_val;
return pivot;
}
void quick_sort(int *arr, int f, int l)
{
if (f >= l) return;
int p = partition(arr, f, l);
quick_sort(arr, f, p-1);
quick_sort(arr, p+1, l);
}
int main()
{
int arr[] = {
1, 18, 10, 6, 16, 8, 6, 2, 7, 3, 16, 21
};
const int len = sizeof(arr)/sizeof(arr[0]);
quick_sort(arr, 0, len-1);
for (int a : arr) printf("%d ", a);
printf("\n");
return 0;
}
It works, but might swap the pivot elements unnecessarily. So I add same if-statements:
--- a/src/duplicate-1.cpp
+++ b/src/duplicate-2.cpp
@@ -9,8 +9,10 @@ int partition(int *arr, int f, int l)
while (lo < hi)
{
while (hi > lo && arr[hi] >= pivot_val) --hi;
+ if (lo == hi) break;
arr[pivot] = arr[hi]; pivot = hi;
while (lo < hi && arr[lo] <= pivot_val) ++lo;
+ if (lo == hi) break;
arr[pivot] = arr[lo]; pivot = lo;
}
arr[pivot] = pivot_val;
It also passed the test.
Then I wonder if I can make it more concise. And I found out the pivot variable is unnecessary.
--- a/src/duplicate-2.cpp
+++ b/src/duplicate.cpp
@@ -4,19 +4,16 @@
int partition(int *arr, int f, int l)
{
int lo = f, hi = l;
- int pivot = lo;
int pivot_val = arr[lo];
while (lo < hi)
{
while (hi > lo && arr[hi] >= pivot_val) --hi;
- if (lo == hi) break;
- arr[pivot] = arr[hi]; pivot = hi;
+ if (lo != hi) arr[lo] = arr[hi];
while (lo < hi && arr[lo] <= pivot_val) ++lo;
- if (lo == hi) break;
- arr[pivot] = arr[lo]; pivot = lo;
+ if (lo != hi) arr[hi] = arr[lo];
}
- arr[pivot] = pivot_val;
- return pivot;
+ arr[lo] = pivot_val;
+ return lo;
}
Finally, with unit test, the final code looks like this:
#include <cstdio>
int partition(int *arr, int f, int l)
{
int lo = f, hi = l;
int pivot_val = arr[lo];
while (lo < hi)
{
while (hi > lo && arr[hi] >= pivot_val) --hi;
if (lo != hi) arr[lo] = arr[hi];
while (lo < hi && arr[lo] <= pivot_val) ++lo;
if (lo != hi) arr[hi] = arr[lo];
}
arr[lo] = pivot_val;
return lo;
}
void quicksort(int *arr, int f, int l)
{
if (f >= l) return;
int p = partition(arr, f, l);
quicksort(arr, f, p-1);
quicksort(arr, p+1, l);
}
#include <cstdlib>
#include <ctime>
#include <memory>
#define BOOST_TEST_MODULE quicksort
#include <boost/test/included/unit_test.hpp>
void print_array(int *arr, int len)
{
for (int a=0; a<len; ++a) printf("%d ", arr[a]);
printf("\n");
}
void test_once()
{
const int maxlen = 100000;
const int maxval = 1000;
const int len = rand()%(maxlen/10*9) + maxlen/10 + 1;
std::unique_ptr<int[]> arr_ptr(new int[len]);
int *arr = arr_ptr.get();
for (int a=0; a<len; ++a) arr[a] = rand()%maxval;
quicksort(arr, 0, len-1);
for (int a=1; a<len; ++a)
{
if (arr[a]<arr[a-1])
{
printf("Error!\n");
print_array(arr, len);
}
BOOST_REQUIRE(arr[a] >= arr[a-1]);
}
}
BOOST_AUTO_TEST_CASE( test_quicksort )
{
const int max_test_times = 200;
for (int a=1; a<=max_test_times; ++a) {
printf("Performing %d sub-test...\n", a);
test_once();
}
}
BOOST_AUTO_TEST_CASE( single_test )
{
int arr[] = {
1, 18, 10, 6, 16, 8, 6, 2, 7, 3, 16, 21
};
int sorted_arr[] = {
1, 2, 3, 6, 6, 7, 8, 10, 16, 16, 18, 21
};
const int len = sizeof(arr)/sizeof(arr[0]);
quicksort(arr, 0, len-1);
for (int a=0; a<len; ++a)
BOOST_REQUIRE(arr[a] == sorted_arr[a]);
printf("Result of a small sort case:\n");
for (int a : arr) printf("%d ", a);
printf("\n");
}
Output:
$ make test-duplicate
g++ src/duplicate.cpp -o bin/duplicate -std=c++11
cd bin && ./duplicate
Running 2 test cases...
Performing 1 sub-test...
Performing 2 sub-test...
Performing 3 sub-test...
Performing 4 sub-test...
Performing 5 sub-test...
...
Performing 196 sub-test...
Performing 197 sub-test...
Performing 198 sub-test...
Performing 199 sub-test...
Performing 200 sub-test...
Result of a small sort case:
1 2 3 6 6 7 8 10 16 16 18 21
*** No errors detected