Access JumpStart 2.0 | Blog

A Rapid Development Framework for Microsoft Access

I’ll continue refactoring today, here’s where I ended up in yesterday’s message. It’s the main function that’s been split up into 1 helper function to test the Hash and make sure it’s available, otherwise display a message box. And a new class that holds information about the command string and returns some pertinent info about it.

Function PS_GetFileHashes(aFiles As Variant, sHashAlgorithm As String) As Scripting.Dictionary
    Dim LenCmd As Long, aIdx As Long
    Dim retVal As New Scripting.Dictionary
    Dim sPSCmd As String, itm As Variant, sPathsPart As String, sPreviousPart As String, aOutput As Variant, itmParts As Variant
    
    On Error GoTo Error_Handler
    If AlgorithimIsUnknown(sHashAlgorithm) Then GoTo Error_Handler_Exit
    
    Dim PScmd As New PS_FileHashesCommand
    LenCmd = PScmd.LenCmdWithoutPaths(sHashAlgorithm)
    ' Need to split aFiles into manageable segments.  515 files overloaded the PS_GetOutput in this case
    ' Maybe get every 50 - 100 files at a time.
    ' Looks like 32656 characters is near the limit, 32766 is over the limit 32768 is
    aIdx = LBound(aFiles)
    Do While aIdx <= UBound(aFiles)
        sPathsPart = ""
        
        Do
            sPreviousPart = sPathsPart
            sPathsPart = sPathsPart & HALutil.WrapText(aFiles(aIdx), "'") & ","
            aIdx = aIdx + 1
            If aIdx >= UBound(aFiles) Then Exit Do
        Loop Until Len(sPathsPart) - 1 + LenCmd > 32656
        
        ' We left because either we reached the end of the array or we exceeded the limit.
        ' If we exceeded the limit then we need to back one element off of sPathsPart and the aIdx
        If Len(sPathsPart) - 1 + LenCmd > 32656 Then
            aIdx = aIdx - 1
            sPathsPart = sPreviousPart
        End If
        'Remove the comma from the end if there, if empty, use *.*
        If Len(sPathsPart) > 0 Then sPathsPart = Left(sPathsPart, Len(sPathsPart) - 1) Else sPathsPart = "'*.*'"
        ' Build the PowerShell command string
        sPSCmd = PScmd.BuildFileHashesCommand(sPathsPart, sHashAlgorithm)
        aOutput = Split(CStr(PS_GetOutput(sPSCmd)), vbCrLf)
        ' Add the output for filenames / hashes to the dictionary to return
        For Each itm In aOutput
            itmParts = Split(itm, "|")
            If UBound(itmParts) > 0 Then retVal.Add itmParts(0), itmParts(1)
        Next itm
    Loop
    Set PS_GetFileHashes = retVal

Error_Handler_Exit:
    On Error Resume Next
    Exit Function
'Get-ChildItem -Path '.\*.xls','.\*.csv' -Recurse -File | Get-FileHash -Algorithm SHA256
Error_Handler:
    MsgBox "The following error has occured" & vbCrLf & vbCrLf & _
           "Error Number: " & Err.Number & vbCrLf & _
           "Error Source: PS_GetFileHash" & vbCrLf & _
           "Error Description: " & Err.Description & _
           Switch(Erl = 0, "", Erl <> 0, vbCrLf & "Line No: " & Erl) _
           , vbOKOnly + vbCritical, "An Error has Occured!"
           Resume Next
    Resume Error_Handler_Exit
End Function

Private Function AlgorithimIsUnknown(sHashAlgorithm As String) As Boolean
    Dim retVal As Boolean: retVal = True
    Select Case sHashAlgorithm
        Case "MACTripleDES", "MD5", "RIPEMD160", "SHA1", "SHA256", "SHA384", "SHA512"
        Case Else
            MsgBox "Unknown Algorithm", vbCritical Or vbOKOnly, "Operation Aborted"
            retVal = False
    End Select
    AlgorithimIsUnknown = retVal
End Function
' Class PS_FileHashesCommand
Option Compare Database
Option Explicit

Const PS_Cmd_Beg As String = "Get-ChildItem -Path "
Const PS_Cmd_Mid As String = " -File | Get-FileHash -Algorithm "
Const PS_Cmd_End As String = " | ForEach { $_.Path + '|' + $_.Hash }"

Public Function BuildFileHashesCommand(sPathsPart As String, sHashAlgorithm As String) As String
    Dim retVal As String: retVal = ""
    retVal = PS_Cmd_Beg & sPathsPart & PS_Cmd_Mid & sHashAlgorithm & PS_Cmd_End
    
    BuildFileHashesCommand = retVal
End Function

Public Function LenCmdWithoutPaths(sHashAlgorithm) As Long
    Dim retVal As Long: retVal = 0
    retVal = Len(PS_Cmd_Beg & PS_Cmd_Mid & sHashAlgorithm & PS_Cmd_End)
    LenCmdWithoutPaths = retVal
End Function

So in the next section we start creating our variables and looping through the array of files to build the string:

Function PS_GetFileHashes(aFiles As Variant, sHashAlgorithm As String) As Scripting.Dictionary
    '...
    Dim PScmd As New PS_FileHashesCommand
    LenCmd = PScmd.LenCmdWithoutPaths(sHashAlgorithm)
    ' Need to split aFiles into manageable segments.  515 files overloaded the PS_GetOutput in this case
    ' Maybe get every 50 - 100 files at a time.
    ' Looks like 32656 characters is near the limit, 32766 is over the limit 32768 is
    aIdx = LBound(aFiles)
    Do While aIdx <= UBound(aFiles)
        sPathsPart = ""
        
        Do
            sPreviousPart = sPathsPart
            sPathsPart = sPathsPart & HALutil.WrapText(aFiles(aIdx), "'") & ","
            aIdx = aIdx + 1
            If aIdx >= UBound(aFiles) Then Exit Do
        Loop Until Len(sPathsPart) - 1 + LenCmd > 32656
        
        ' We left because either we reached the end of the array or we exceeded the limit.
        ' If we exceeded the limit then we need to back one element off of sPathsPart and the aIdx
        If Len(sPathsPart) - 1 + LenCmd > 32656 Then
            aIdx = aIdx - 1
            sPathsPart = sPreviousPart
        End If
        'Remove the comma from the end if there, if empty, use *.*
        If Len(sPathsPart) > 0 Then sPathsPart = Left(sPathsPart, Len(sPathsPart) - 1) Else sPathsPart = "'*.*'"
        ' Build the PowerShell command string
        sPSCmd = PScmd.BuildFileHashesCommand(sPathsPart, sHashAlgorithm)
        aOutput = Split(CStr(PS_GetOutput(sPSCmd)), vbCrLf)
        ' Add the output for filenames / hashes to the dictionary to return
        For Each itm In aOutput
            itmParts = Split(itm, "|")
            If UBound(itmParts) > 0 Then retVal.Add itmParts(0), itmParts(1)
        Next itm
    Loop
    '...
End Function

So the main code executing here is an outer loop over all the files, then an inner loop that builds the string until it is either done, or trips the character limit of the PS_GetOutput function.

I’m looking for something simple here. We are manipulating the index of the array during the procedure using the internal loop mainly then backing the index down 1 and using that string if it trips the length trigger. So seems like I want a loop iterator object that could keep track of the array index. I think for simplicity, I’ll start with the little loop in the middle and write a function that will do the needed work. It would need to return the correct index to start the next segment of files as well as the string. Actually, if I just pass the index as a reference, I don’t have to pass any new value back to it. This isn’t valid compiling code yet, but just cutting and pasting the code into a new function:

Private Function GetNextStringSegment(ByRef aIdx As Long, ByRef aFiles As Variant) As String
    sPathsPart = ""
    
    Do
        sPreviousPart = sPathsPart
        sPathsPart = sPathsPart & HALutil.WrapText(aFiles(aIdx), "'") & ","
        aIdx = aIdx + 1
        If aIdx >= UBound(aFiles) Then Exit Do
    Loop Until Len(sPathsPart) - 1 + LenCmd > 32656
    
    ' We left because either we reached the end of the array or we exceeded the limit.
    ' If we exceeded the limit then we need to back one element off of sPathsPart and the aIdx
    If Len(sPathsPart) - 1 + LenCmd > 32656 Then
        aIdx = aIdx - 1
        sPathsPart = sPreviousPart
    End If
    'Remove the comma from the end if there, if empty, use *.*
    If Len(sPathsPart) > 0 Then sPathsPart = Left(sPathsPart, Len(sPathsPart) - 1) Else sPathsPart = "'*.*'"
End Function

I like that I’ve just pulled out a huge chunk of the original routine and now give it a much better name according to what it’s doing. Now let’s make it a valid function and place it appropriately in the original function I’m refactoring. I normally use retVal as function return values but in this case since sPathsPart is already the main thing we want to return, I’ll just use that so I don’t have to replace or rewrite those references.

Private Function GetNextStringSegment(ByRef aIdx As Long, ByRef aFiles As Variant) As String
    Dim sPathsPart As String: sPathsPart = ""
    '...
End Function

And now it looks like I missed the LenCmd so I’ll add that into the variables, although I’m not sure I like adding a third parameter, but…

I’ll also need to Dim the sPreviousPart variable, but that should be it for variables needed and other than that, I just need to set the return value at the end.

Private Function GetNextStringSegment(ByRef aIdx As Long, ByRef aFiles As Variant, LenCmd As Long) As String
    Dim sPathsPart As String: sPathsPart = ""
    Dim sPreviousPart As String

    ' This is the loop to ensure that we get the paths concatenated until the end of the array
    ' is reached or the string maximum is exceeded
    Do
        sPreviousPart = sPathsPart
        sPathsPart = sPathsPart & HALutil.WrapText(aFiles(aIdx), "'") & ","
        aIdx = aIdx + 1
        If aIdx >= UBound(aFiles) Then Exit Do
    Loop Until Len(sPathsPart) - 1 + LenCmd > 32656
    
    ' If we exceeded the limit then we need to back one element off of sPathsPart and the aIdx
    If Len(sPathsPart) - 1 + LenCmd > 32656 Then
        aIdx = aIdx - 1
        sPathsPart = sPreviousPart
    End If
    'Remove the comma from the end if there, if empty, use *.*
    If Len(sPathsPart) > 0 Then sPathsPart = Left(sPathsPart, Len(sPathsPart) - 1) Else sPathsPart = "'*.*'"
End Function

Now our original function is much smaller and looks like this:

Function PS_GetFileHashes(aFiles As Variant, sHashAlgorithm As String) As Scripting.Dictionary
    Dim retVal As New Scripting.Dictionary
    Dim LenCmd As Long, aIdx As Long
    Dim sPSCmd As String, itm As Variant, sPathsPart As String, aOutput As Variant, itmParts As Variant
    
    On Error GoTo Error_Handler
    If AlgorithimIsUnknown(sHashAlgorithm) Then GoTo Error_Handler_Exit
    
    Dim PScmd As New PS_FileHashesCommand
    LenCmd = PScmd.LenCmdWithoutPaths(sHashAlgorithm)
    aIdx = LBound(aFiles)
    Do While aIdx <= UBound(aFiles)
        sPathsPart = GetNextStringSegment(aIdx, aFiles, LenCmd)
        ' Build the PowerShell command string
        sPSCmd = PScmd.BuildFileHashesCommand(sPathsPart, sHashAlgorithm)
        aOutput = Split(CStr(PS_GetOutput(sPSCmd)), vbCrLf)
        ' Add the output for filenames / hashes to the dictionary to return
        For Each itm In aOutput
            itmParts = Split(itm, "|")
            If UBound(itmParts) > 0 Then retVal.Add itmParts(0), itmParts(1)
        Next itm
    Loop
    Set PS_GetFileHashes = retVal

Error_Handler_Exit:
    On Error Resume Next
    Exit Function
Error_Handler:
    MsgBox "The following error has occured" & vbCrLf & vbCrLf & _
           "Error Number: " & Err.Number & vbCrLf & _
           "Error Source: PS_GetFileHash" & vbCrLf & _
           "Error Description: " & Err.Description & _
           Switch(Erl = 0, "", Erl <> 0, vbCrLf & "Line No: " & Erl) _
           , vbOKOnly + vbCritical, "An Error has Occured!"
           Resume Next
    Resume Error_Handler_Exit
End Function

I think I can finish this up tomorrow for the original function. There would be more I’d want to refactor in the helper function I just created to make it a little more intuitive, but once I’m finished refactoring the original function, I’m going to end this series.