import csv
from collections import defaultdict
import os

dups = {}

class Dupset:
    def __init__(self, original):
        self.items = defaultdict(set)
        self.items['ORIGINAL'] = original

    def add(self, duplicate):
        self.items['DUPLICATE'].add(duplicate)

    def set_original(self, name):
        if (name == self.items['ORIGINAL']): return
        old_original = self.items['ORIGINAL']
        self.items['DUPLICATE'].remove(name)
        self.items['DUPLICATE'].add(old_original)
        self.items['ORIGINAL'] = name

    def names(self):
        return iter([self.items['ORIGINAL'], *self.items['DUPLICATE']])


with open('results.txt', 'r') as f:
    next(f)
    next(f)
    reader = csv.reader(f, delimiter=' ')
    for duptype, id, depth, size, device, inode, priority, *name_parts in reader:
        name = ' '.join(name_parts)
        id = abs(int(id))
        if (duptype == 'DUPTYPE_FIRST_OCCURRENCE'):
            dups[id] = Dupset(name)
        else:
            dups[id].add(name)

for dupset in dups.values():
    for name in dupset.names():
        if dupset.items['ORIGINAL'].rsplit('.')[-2].endswith('-2') and 'Google Photos Backup' not in name and '-3' not in name:
            dupset.set_original(name)
        if 'Diplomauitreiking' in dupset.items['ORIGINAL']:
            dupset.set_original(name)
        if 'Exported' in dupset.items['ORIGINAL']:
            dupset.set_original(name)

print('Scheduled changes:')
for dupset in dups.values():
    print(f"Keep: {dupset.items['ORIGINAL']}\tDelete: {', '.join(dupset.items['DUPLICATE'])}")

ok = input('Agree? y/N: ')
if (ok != 'y'):
    import sys
    sys.exit()

for dupset in dups.values():
    for duplicate in dupset.items['DUPLICATE']:
        print(f'Unlinking {duplicate}...')
        try:
            os.unlink(duplicate)
        except FileNotFoundError:
            pass