#!/usr/bin/env python3 # -*- coding: utf-8 -*- from typing import Tuple, List, Generator, Dict, Any import argparse import os import string import datetime import ctypes import ctypes.wintypes import hashlib import pypyodbc import utils IGNORED_NAMES = ['Windows', 'Program Files', 'Program Files (x86)', 'AppData'] def get_drives() -> List[str]: # only return local file systems length = (ctypes.wintypes.DWORD * 1)() mpr = ctypes.WinDLL('mpr') return [d for d in string.ascii_uppercase if os.path.exists(f'{d}:') and mpr.WNetGetConnectionW(f'{d}:', None, length) != 0xEA] def search_accdb(base_path: str = None) -> Generator[str, None, None]: if base_path is None: for drive in get_drives(): yield from search_accdb(f'{drive}:') return try: entries = [e for e in os.scandir(f'{base_path}\\')] for entry in entries: path = f'{base_path}\\{entry.name}' if entry.name in IGNORED_NAMES: continue elif entry.name.lower().endswith('.accdb') and entry.is_file(): yield path elif len(entries) <= 100 and entry.is_dir() and not entry.is_symlink(): yield from search_accdb(path) except PermissionError: pass def get_accdb_info(filename: str) -> Tuple[str, datetime.datetime, datetime.datetime, int]: stat = os.stat(filename) with open(filename, 'rb', buffering=0) as f: filehash = hashlib.file_digest(f, 'md5').hexdigest() return filehash,\ datetime.datetime.fromtimestamp(stat.st_ctime),\ datetime.datetime.fromtimestamp(stat.st_mtime),\ stat.st_size def get_accdb_files() -> Dict[str, Dict[str, Any]]: files = {} for filename in search_accdb(): filehash, ctime, mtime, size = get_accdb_info(filename) if filehash not in files: files[filehash] = {'hash': filehash, 'size': size, 'locations': [], 'names': set(), 'ctime': ctime, 'mtime': mtime} fh = files[filehash] fh['locations'].append(filename) fh['names'].add(filename.split('\\')[-1].lower()) if mtime > fh['mtime']: fh['mtime'] = mtime if ctime < fh['ctime']: fh['ctime'] = ctime return files def prompt() -> str: print('Searching for accdb files...', flush=True) files = get_accdb_files() file_map = {} n = 0 for info in sorted([f for f in files.values()], key=lambda f: f['mtime'], reverse=True): name = tuple(info['names'])[0][:-6] if name != 'wgdaten': continue n += 1 file_map[n] = info locs = info['locations'] print(f'{n:>2}: {name:<24} {locs[0]}') print(f' Created: {info["ctime"].date().isoformat()} {locs[1] if len(locs) > 1 else ""}') print(f' Modified: {info["mtime"].date().isoformat()} {locs[2] if len(locs) > 2 else ""}') for loc in locs[3:]: print(' ' * 29 + loc) print() while True: try: text = input('Choose accdb file (default=1): ') if text == '': choice = 1 else: choice = int(text) if 1 <= choice <= n: break except ValueError: pass return file_map[choice]['locations'][0] def main() -> None: parser = argparse.ArgumentParser() parser.add_argument('-o', '--output', default='tables') parser.add_argument('-f', '--file', metavar='WGDATEN', required=False) args = parser.parse_args() os.makedirs(args.output, exist_ok=True) if not args.file: args.file = prompt() print(f'Opening {args.file}...', flush=True) pypyodbc.lowercase = False cnx = pypyodbc.connect(f"Driver={{Microsoft Access Driver (*.mdb, *.accdb)}};Dbq={args.file};") cur = cnx.cursor() print(f'Opened {args.file}!', flush=True) try: print(f'Fetching tables...', flush=True) tbls = cur.tables(tableType='TABLE') tbls = tbls.fetchall() print(f'Successfully fetched {len(tbls)} tables!', flush=True) for file, _, t_name, t_type, _ in tbls: print(f'Exporting {t_name}...', flush=True) cur.execute(f"SELECT TOP 1 * FROM {t_name};") desc = [(t[0], t[1]) for t in cur.description] cur.fetchall() print(desc, flush=True) cur.execute(f"SELECT * FROM {t_name} ORDER BY `{desc[0][0]}`;") cols = [t[0] for t in cur.description] with utils.csv_open(f'{args.output}/{t_name}.csv') as f: f.header(*cols) for row in cur: values = (utils.convert_value(val, table=t_name, column=col) for col, val in zip(cols, row)) f.row(*values, raw=True) print(f'Exported {t_name} successfully!', flush=True) finally: cur.close() cnx.close() if __name__ == '__main__': main()