Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

#!/usr/bin/env python3 

 

import itertools 

import contextlib 

import datetime 

import tempfile 

import ofxparse 

import os 

 

from selenium import webdriver 

from selenium.webdriver.common.keys import Keys 

from selenium.webdriver.support.ui import Select 

from selenium.common.exceptions import NoSuchElementException 

 

# Debug Mode 

# ========== 

# 1. Show GUI 

# 2. Save OFX files to non-temporary file. 

# 3. Print out more status updates. 

 

@contextlib.contextmanager 

def firefox_driver(download_dir, gui=False, max_load_time=10): 

    from xvfbwrapper import Xvfb 

 

    # If the GUI was not explicitly requested, use the X virtual frame buffer  

    # (Xvfb) to gobble it. 

 

    if not gui: 

        xvfb = Xvfb() 

        xvfb.start() 

 

    # Change some of the Firefox's default preferences.  In particular,  

    # configure it to automatically download files without asking questions. 

 

    profile = webdriver.FirefoxProfile() 

    profile.set_preference('browser.download.folderList',2) 

    profile.set_preference('browser.download.manager.showWhenStarting',False) 

    profile.set_preference('browser.download.dir', download_dir) 

    profile.set_preference('browser.helperApps.neverAsk.saveToDisk','application/vnd.intu.QFX') 

 

    # If the GUI is disabled, don't bother downloading CSS or images. 

 

    if not gui: 

        profile.set_preference('permissions.default.stylesheet', 2) 

        profile.set_preference('permissions.default.image', 2) 

        profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false') 

 

    # Construct and yield a Firefox driver. 

 

    driver = webdriver.Firefox(profile) 

    driver.implicitly_wait(max_load_time) 

 

    yield driver 

 

    # If the GUI is disabled, close the browser as soon as the scraping is  

    # complete. 

 

    if not gui: 

        driver.close() 

        xvfb.stop() 

 

 

class WellsFargo: 

 

    def __init__(self, username, password, gui=False): 

        self.username = username 

        self.password = password 

        self.gui = gui 

 

    def download(self, from_date=None, to_date=None): 

        # Create a temporary directory that the scraper can download all the  

        # financial data into. 

 

        with tempfile.TemporaryDirectory() as ofx_dir: 

 

            # Download financial data from Wells Fargo, then parse it and make  

            # a list of transactions for each account. 

 

            self._scrape(ofx_dir, from_date, to_date) 

            return self._parse(ofx_dir) 

 

    def _scrape(self, ofx_dir, from_date=None, to_date=None): 

        if to_date is None: to_date = datetime.date.today() 

        if from_date is None: from_date = to_date - datetime.timedelta(30) 

 

        from_date = from_date.strftime('%m/%d/%y') 

        to_date = to_date.strftime('%m/%d/%y') 

 

        with firefox_driver(ofx_dir, gui=self.gui) as driver: 

 

            # Login to Wells Fargo's website. 

            driver.get('https://www.wellsfargo.com/') 

 

            username_form = driver.find_element_by_id('userid') 

            password_form = driver.find_element_by_id('password') 

            username_form.send_keys(self.username) 

            password_form.send_keys(self.password) 

            password_form.submit() 

 

            # Go to the "Account Activity" page. 

            driver.find_element_by_link_text("Account Activity").click() 

 

            # Go to the "Download" page. 

            driver.find_element_by_link_text("Download Account Activity").click() 

 

            # Download account activity in the OFX format. 

            for i in itertools.count(): 

 

                # Pick the next account to download. 

                accounts = driver.find_element_by_name('primaryKey') 

                try: account = Select(accounts).select_by_index(i) 

                except NoSuchElementException: break 

                driver.find_element_by_name("Select").click() 

 

                # Pick the date range to download. 

                driver.find_element_by_id('fromDate').clear() 

                driver.find_element_by_id('toDate').clear() 

                driver.find_element_by_id('fromDate').send_keys(from_date) 

                driver.find_element_by_id('toDate').send_keys(to_date) 

 

                # Download it. 

                driver.find_element_by_id('quickenOFX').click() 

                driver.find_element_by_name('Download').click() 

 

    def _parse(self, ofx_dir): 

        accounts = [] 

 

        for ofx_path in os.listdir(ofx_dir): 

            ofx_path = os.path.join(ofx_dir, ofx_path) 

            with open(ofx_path, 'rb') as ofx_file: 

                ofx = ofxparse.OfxParser.parse(ofx_file) 

                accounts += ofx.accounts 

 

        return accounts 

 

 

class ScrapingError: 

 

    def __init__(self, message): 

        self.message = message 

 

 

 

if __name__ == '__main__': 

    from pprint import pprint 

    scraper = WellsFargo('username', 'password', gui=False) 

    pprint(scraper.download())