15. 3Sum,三数和

Last Updated: 2023-11-07 11:24:34 Tuesday

-- TOC --

中等难度的题目,很容易TLE,要见路不走...

题目分析

Given an integer array nums, return all the triplets [nums[i], nums[j], nums[k] such that i != j, i != k, and j != k, and nums[i] + nums[j] + nums[k] == 0. Notice that the solution set must not contain duplicate triplets.

给出一个int array,返回list,里面包含所有的满足条件的三元组(也是list),其中每个元素来自不同的index(但元素的值可能相同),并且相加之和等于0。

Example 1:
Input: nums = [-1,0,1,2,-1,-4]
Output: [[-1,-1,2],[-1,0,1]]
Explanation: 
nums[0] + nums[1] + nums[2] = (-1) + 0 + 1 = 0.
nums[1] + nums[2] + nums[4] = 0 + 1 + (-1) = 0.
nums[0] + nums[3] + nums[4] = (-1) + 2 + (-1) = 0.
The distinct triplets are [-1,0,1] and [-1,-1,2].
Notice that the order of the output and the order of the triplets does not matter.

Example 2:
Input: nums = [0,1,1]
Output: []
Explanation: The only possible triplet does not sum up to 0.

Example 3:
Input: nums = [0,0,0]
Output: [[0,0,0]]
Explanation: The only possible triplet sums up to 0.

返回triplet的顺序,以及三元组内部的顺序,都无所谓。但不能包含元素完全重复的triplet。

Constraints:

思路

(1)找出所有三元组,然后根据条件过滤

我有专门研究过如何找出所有子集,其方法可以用来找出所有三元组。但经过测试,这种方法在应对此题时,非常的慢,TLE,不适合。

(2)找出所有二元组的和,通过hash表确定第3个数,按条件过滤最后结果

用这个思路写出的Python代码,可以提交,但还是慢,排名倒数。后文有代码。

(3)确定一个数,用Two Pointer的方法找出另外两个数,此问题变成了两数和!

这是本题的技能点。用Two Pointer的方法找出两个数,虽然是\(O(N)\)级别,但前提是已排序。在无序的情况下找出两个数,\(O(N^2)\)。各有各的代价,不过在Python中排序,可以享受C的速度。对于本题,一次排序,终生享用。

思路2(Python)

class Solution:
    def threeSum(self, nums: List[int]) -> List[List[int]]:
        n = len(nums)
        dnums = {v:i for i,v in enumerate(nums)}
        r = []
        for i in range(n):
            for j in range(i+1,n):
                k = 0 - nums[i] - nums[j]
                if k in dnums and dnums[k]>j:
                    a = sorted((nums[i],nums[j],k))
                    if a not in r:
                        r.append(a)
        return r

由于无法确定找出来的三个数的顺序,因此只能用先排序后做membership检测的方法,来去重。也可以设置r为set,sorted后转tuple,再add,但这样似乎要慢一些,list是很快的。

确定一个数后求两数和

Python

class Solution:
    def threeSum(self, nums: list[int]) -> list[list[int]]:
        nums.sort()
        rtv = []
        size = len(nums)
        i = 0
        while i < size-2:
            if nums[i] > 0:  # target=0
                break
            if i>0 and nums[i]==nums[i-1]:
                i += 1
                continue
            t = -nums[i]
            lo = i + 1
            hi = size - 1
            while lo < hi:
                s = nums[lo] + nums[hi]
                if s<t or (lo>i+1 and nums[lo]==nums[lo-1]):
                    lo += 1
                    continue
                if s>t or (hi<size-1 and nums[hi]==nums[hi+1]):
                    hi -= 1
                    continue
                rtv.append([nums[i],nums[lo],nums[hi]])
                lo += 1
                hi -= 1
            i += 1
        return rtv

C++

class Solution {
public:
    vector<vector<int>> threeSum(vector<int>& nums) {
        sort(nums.begin(), nums.end());
        vector<vector<int>> r;
        r.reserve(64);
        size_t size { nums.size() };
        for(size_t i{}; i<size-2; ++i){
            if(nums[i] > 0)  // target=0
                break;
            if((i>0) && (nums[i]==nums[i-1]))
                continue;
            int t { -nums[i] };
            size_t lo { i+1 };
            size_t hi { size-1 };
            while(lo < hi){
                int s { nums[lo]+nums[hi] };
                if(s<t || (lo>i+1 && nums[lo]==nums[lo-1])){
                    ++lo;
                    continue;
                }
                if(s>t || (hi<size-1 && nums[hi]==nums[hi+1])){
                    --hi;
                    continue;
                }
                r.push_back({nums[i],nums[lo],nums[hi]});
                ++lo;
                --hi;
            }
        }
        return r;
    }
};

C

left改lo,right改hi,去重逻辑优化....不改了....留着代码学习realloc。

static int comp(const void *a, const void *b){
    int ia = *(int*)a;
    int ib = *(int*)b;
    // return ia<ib ? -1 : ia>ib ? 1 : 0;
    // no overflow according to constraints
    return ia - ib;
}

/**
 * Return an array of arrays of size *returnSize.
 * The sizes of the arrays are returned as *returnColumnSizes array.
 * Note: Both returned array and *columnSizes array must be malloced, assume caller calls free().
 */
int** threeSum(int* nums, int numsSize, int* returnSize, int** returnColumnSizes){
    qsort(nums, numsSize, sizeof(int), comp);
    int **rtv = NULL;
    *returnSize = 0;
    for(int i=0; i<numsSize-2; ++i){
        if(nums[i] > 0)
            break;
        if((i>0) && nums[i]==nums[i-1])
            continue;
        int t = -nums[i];
        int left = i + 1;
        int right = numsSize - 1;
        while(left < right){
            int s = nums[left] + nums[right];
            if(s == t){
                int idx = (*returnSize)++;
                if(idx%64 == 0)  // expand every 64 items
                    rtv = realloc(rtv, 64*(idx/64+1)*sizeof(int*));
                rtv[idx] = (int*)malloc(sizeof(int)*3);
                rtv[idx][0] = nums[i];
                rtv[idx][1] = nums[left];
                rtv[idx][2] = nums[right];
                while((left<right) && (nums[left]==nums[left+1]))
                    ++left;
                while((left<right) && (nums[right]==nums[right-1]))
                    --right;
                ++left;
                --right;
                continue;
            }
            if(s < t)
                ++left;
            else
                --right;
        }
    }

    rtv = realloc(rtv, sizeof(int*)*(*returnSize));
    *returnColumnSizes = (int*)malloc(sizeof(int)*(*returnSize));
    for(int j=0; j<*returnSize; ++j)
        (*returnColumnSizes)[j] = 3;
    return rtv;
}

统计后排序和Hash查找

学习了高手的代码后,领悟了这一算法思路。此题可以分成以下几种情况:

  1. 三个0
  2. 一个0,另外两个数一正一负,绝对值相等
  3. 没有0,但存在两个值相同的元素
  4. 没有0,每个元素的值都不同(去重后排序查找,需有Hashtable加持,不用two pointer)

既然要数有多少个0,就可以考虑使用Python标准库中的Counter对象了。它能直接得到一个类似dict的对象,统计了每个元素出现的次数。比较巧妙的地方是,使用Counter即统计了次数,还完成了去重,得到了一个Hashtable(当然,自己写代码实现也不难,但思路首先要到位)。如果再排个序,就可以在Hashtable和有序不重复序列的情况下,使用更快的算法,来完成第4种情况的计算。Two Pointer算法在有序序列上查找满足条件的两个数很快,但如果已经有了一个Hashtable,可以不用Two Pointer算法,速度可能更快。

满足了这三个条件:(1)排序序列,(2)不重复,(3)存在一个Hashtable。算法思路:通过在排序序列上使用两次二分查找,找到这样一个区间,当给定一个两数和时,此区间内必然存在一个数,遍历此区间,用hashtable取判断是否存在另一个数。找出的区间可能为空,此时,算法复杂度就从\(O(N)\)降到了\(O(\log{N})\)。

对于target不等于0的情况,此思路也可应用...

Python

class Solution:
    def threeSum(self, nums: list[int]) -> list[list[int]]:
        r = []
        ncd = Counter(nums)
        # three zeros
        if ncd[0] >= 3:
            r.append([0,0,0])
        nsorted = sorted(ncd)
        # one zero
        if ncd[0]:
            nsorted.remove(0)
            for n in nsorted:
                if n > 0:
                    break
                if ncd[-n]:
                    r.append([0,n,-n])
        # two identicals without zero
        for n in nsorted:
            if n % 2:
                continue
            m = -n // 2
            if ncd[m] >= 2:
                r.append([n,m,m])
        # different three without zero
        for i in range(len(nsorted)):
            if nsorted[i] > 0:
                break
            t = -nsorted[i]
            rmax = (t-1) // 2
            ridx = bisect_right(nsorted, rmax)
            lmin = t - nsorted[-1]
            lidx = bisect_left(nsorted, lmin, i+1)
            for a in nsorted[lidx:ridx]:
                b = t - a
                if ncd[b]:
                    r.append([nsorted[i],a,b])        
        return r
        """ Two Pointer
            left = i + 1
            right = len(nsorted) - 1
            while left < right:
                s = nsorted[left] + nsorted[right]
                if s == t:
                    r.append([nsorted[i],nsorted[left],nsorted[right]])
                    left += 1
                    right -= 1
                    continue
                if s < t:
                    left += 1
                else:
                    right -= 1
        return r
        """

C++

class Solution {
public:
    vector<vector<int>> threeSum(vector<int>& nums) {
        map<int,int> sc;
        for(auto &it: nums)
            ++sc[it];

        vector<vector<int>> r;
        r.reserve(64);

        // three zeros
        if(sc.find(0)!=sc.end() && sc[0] >= 3)
            r.push_back({0,0,0});

        // sorted no-duplicated keys with random access
        vector<int> scn;
        scn.reserve(sc.size());
        // one zero
        if(sc.find(0)!=sc.end() && sc[0]){
            sc.erase(0);
            // construct scn without zero 1
            for(auto it{sc.begin()}; it!=sc.end(); ++it)
                scn.push_back(it->first);
            for(size_t i{}; i<scn.size(); ++i){
                if(scn[i] > 0)
                    break;
                if(sc.find(-scn[i]) != sc.end())
                    r.push_back({0,scn[i],-scn[i]});
            }
        }
        // construct scn without zero 2
        if(scn.size() == 0)
            for(auto &[k,_]: sc)
                scn.push_back(k);

        // no zero, two identicals
        for(size_t i{}; i<scn.size(); ++i){
            if(scn[i] & 1)
                continue;
            int a { -scn[i]/2 };
            if(sc.find(a)!=sc.end() && sc[a]>=2)
                r.push_back({scn[i], a, a});
        }

        // no zero, no identicals
        for(size_t i{}; i<scn.size(); ++i){
            int t { -scn[i] };
            int rmax { (t-1)/2 };
            ssize_t ridx { upper_bound(scn.begin(),scn.end(),rmax)-scn.begin() };
            int lmin { t-scn.back() };
            ssize_t lidx { lower_bound(scn.begin(),scn.end(),lmin)-scn.begin() };
            lidx = max((size_t)lidx, i+1);
            for(ssize_t j{lidx}; j<ridx; ++j){
                int a { t-scn[j] };
                if(sc.find(a)!=sc.end())
                    r.push_back({scn[i],scn[j],a});
            }
            /*auto left { i+1 };
            auto right { scn.size()-1 };
            while(left < right){
                int s { scn[left] + scn[right] };
                if(s == t){
                    r.push_back({scn[i],scn[left],scn[right]});
                    ++left;
                    --right;
                    continue;
                }
                if(s < t)
                    ++left;
                else
                    --right;
            }*/
        }

        r.shrink_to_fit();
        return r;
    }
};

C

第2次使用uthash,这是LeetCode允许的。uthash用起来还是挺方便的。(第1次尝试uthash,是LeetCode第1题,两数和

C没有C++的lower_bound和upper_bound接口,其标准库中的bsearch,只能得到有或没有的结果,因此下面的代码,最后一步,就用two pointer的思路了。

static int comp(const void *a, const void *b){
    // return ia<ib ? -1 : ia>ib ? 1 : 0;
    // no overflow according to constraints
    return *(int*)a - *(int*)b;
}

struct counter{
    int val;
    int count;
    UT_hash_handle hh;
};

/**
 * Return an array of arrays of size *returnSize.
 * The sizes of the arrays are returned as *returnColumnSizes array.
 * Note: Both returned array and *columnSizes array must be malloced, assume caller calls free().
 */
int** threeSum(int* nums, int numsSize, int* returnSize, int** returnColumnSizes){
    // construct nsorted without zero and htab
    int *nsorted = malloc(sizeof(int)*numsSize);
    int nsortedSize = 0;
    struct counter *pnums = malloc(sizeof(struct counter)*numsSize);
    int pnidx = 0;
    struct counter *htab = NULL;
    struct counter *sr;
    for(int i=0; i<numsSize; ++i){
        HASH_FIND_INT(htab, nums+i, sr);
        if(!sr){
            pnums[pnidx].val = nums[i];
            pnums[pnidx].count = 1;
            HASH_ADD_INT(htab, val, pnums+pnidx);
            ++pnidx;
            if(nums[i] == 0)
                continue;
            nsorted[nsortedSize++] = nums[i];
        } else ++(sr->count);
    }
    qsort(nsorted, nsortedSize, sizeof(int), comp);

    // init
    int **rtv = malloc(sizeof(int*)*64);
    int idx = 0;

    // three zeros
    int zero = 0;
    HASH_FIND_INT(htab, &zero, sr);
    if(sr && sr->count>=3){
        rtv[idx] = malloc(sizeof(int)*3);
        rtv[idx][0] = 0;
        rtv[idx][1] = 0;
        rtv[idx][2] = 0;
        ++idx;
    }

    // one zero, reuse sr
    if(sr)
        for(int i=0; i<nsortedSize; ++i){
            if(nsorted[i] > 0)
                break;
            int t = -nsorted[i];
            struct counter *sr2;
            HASH_FIND_INT(htab, &t, sr2);
            if(sr2){
                if(idx%64 == 0)
                    rtv = realloc(rtv, sizeof(int*)*(idx/64+1)*64);
                rtv[idx] = malloc(sizeof(int)*3);
                rtv[idx][0] = 0;
                rtv[idx][1] = nsorted[i];
                rtv[idx][2] = t;
                ++idx;
            }
        }

    // no zero, but two same
    for(int i=0; i<nsortedSize; ++i){
        if(nsorted[i] & 1)
            continue;
        int t = -nsorted[i] / 2;
        struct counter *sr2;
        HASH_FIND_INT(htab, &t, sr2);
        if(sr2 && sr2->count>=2){
            if(idx%64 == 0)
                rtv = realloc(rtv, sizeof(int*)*(idx/64+1)*64);
            rtv[idx] = malloc(sizeof(int)*3);
            rtv[idx][0] = nsorted[i];
            rtv[idx][1] = t;
            rtv[idx][2] = t;
            ++idx;
        }
    }

    HASH_CLEAR(hh, htab);
    free(pnums);

    // no zero, all different, two pointer
    for(int i=0; i<nsortedSize-2; ++i){
        if(nsorted[i] > 0)
            break;
        int t = -nsorted[i];
        int left = i + 1;
        int right = nsortedSize - 1;
        while(left < right){
            int s = nsorted[left] + nsorted[right];
            if(s == t){
                if(idx%64 == 0)
                    rtv = realloc(rtv, sizeof(int*)*(idx/64+1)*64);
                rtv[idx] = malloc(sizeof(int)*3);
                rtv[idx][0] = nsorted[i];
                rtv[idx][1] = nsorted[left];
                rtv[idx][2] = nsorted[right];
                ++idx;
                ++left;
                --right;
                continue;
            }
            if(s < t)
                ++left;
            else
                --right;
        }
    }
    free(nsorted);
    rtv = realloc(rtv, sizeof(int*)*idx); // shrink

    *returnSize = idx;
    *returnColumnSizes = (int*)malloc(sizeof(int)*(*returnSize));
    for(int j=0; j<*returnSize; ++j)
        (*returnColumnSizes)[j] = 3;
    return rtv;
}

Backtrack(回溯)

这是此类问题的通用解法,我已经总结一篇笔记:《从2Sum到kSum》

在此题中测试回溯算法,速度并不能排到很靠前,但Memory占用非常少,几乎是全网最少的!三数和target为0,应该是一种特殊情况,此题最快的解法,并不具有通用性。

本文链接:https://cs.pynote.net/ag/leetcode/202310011/

-- EOF --

-- MORE --