2011年5月21日 星期六

Excel VBA 抓 ebay 和 dealoz 書目資料

   1: Option Explicit
   2: Function ExistSheet(shtName As String)
   3:     Dim Sht As Object
   4:     On Error Resume Next
   5:     Set Sht = Sheets(shtName)
   6:     If Err.Number = 0 Then
   7:         ExistSheet = True
   8:     Else
   9:         ExistSheet = False
  10:     End If
  11:     Set Sht = Nothing
  12: End Function
  13: Public Sub getURLs()
  14:     ' find last row
  15:     Dim lastCell As Range
  16:     Set lastCell = Sheets("Control Panel").Range("A65536").End(xlUp)
  17:     Dim lastRow As Integer
  18:     lastRow = lastCell.Row
  19:     
  20:     Dim hlink As Hyperlink
  21:     Dim firstRow As Integer
  22:     firstRow = 3
  23:     
  24:     For Each hlink In Sheets("Control Panel").Range(Cells(firstRow, 1), Cells(lastRow, 1)).Hyperlinks
  25:         Cells(hlink.Range.Row, 2) = hlink.Address
  26:     Next
  27: End Sub
  28: Sub getISBN()
  29:     ' in Excel,  long numeric strings will be displayed in scientific notation
  30:     ' in order to avoid scientific notation
  31:     Cells.Select
  32:     Selection.NumberFormatLocal = "0"
  33:  
  34:     ' for each row:
  35:     ' 1. get DOM objects from URL
  36:     ' 2. find ISBN-10 and ISBN-13
  37:     
  38:     ' find last row
  39:     Dim lastCell As Range
  40:     Dim lastRow As Integer
  41:     Set lastCell = Sheets("Control Panel").Range("B65536").End(xlUp)
  42:     lastRow = lastCell.Row
  43:     Dim firstRow As Integer
  44:     firstRow = 3
  45:     
  46:     ' get DOM objs
  47:     Dim ie As WebBrowser
  48:     Set ie = CreateObject("InternetExplorer.Application")
  49:     Dim doc As HTMLDocument
  50:     Dim elements As IHTMLElementCollection
  51:     Dim elem1, elem2 As IHTMLElement    ' find 2 layers of document objs
  52:     Dim strURL As String
  53:     Dim bISBN_10, bISBN_13 As Boolean
  54:     Dim i As Integer
  55:     For i = firstRow To lastRow
  56:         strURL = Cells(i, 2).Value
  57:         ie.navigate (strURL)
  58:         ie.Visible = True
  59:         Do Until ie.readyState = READYSTATE_COMPLETE
  60:             DoEvents
  61:         Loop
  62:         
  63:         Set doc = ie.document
  64:         ' only search text inside table
  65:         Set elements = doc.getElementsByTagName("TD")
  66:         
  67:         bISBN_10 = False
  68:         bISBN_13 = False
  69:         ' check first layer
  70:         For Each elem1 In elements
  71:             If bISBN_10 = False And IsNumeric(elem1.innerText) And Len(Trim(elem1.innerText)) = 10 Then
  72:                 Cells(i, 3) = elem1.innerText
  73:                 bISBN_10 = True
  74:             End If
  75:             If bISBN_13 = False And IsNumeric(elem1.innerText) And Len(Trim(elem1.innerText)) = 13 Then
  76:                 Cells(i, 4) = elem1.innerText
  77:                 bISBN_13 = True
  78:             End If
  79:             If bISBN_10 = True And bISBN_13 = True Then
  80:                 Exit For
  81:             End If
  82:         Next elem1
  83:         ' check second layer
  84:         For Each elem1 In elements
  85:             For Each elem2 In elem1.all
  86:                 If bISBN_10 = False And IsNumeric(elem2.innerText) And Len(Trim(elem2.innerText)) = 10 Then
  87:                     Cells(i, 3) = elem2.innerText
  88:                     bISBN_10 = True
  89:                 End If
  90:                 If bISBN_13 = False And IsNumeric(elem2.innerText) And Len(Trim(elem2.innerText)) = 13 Then
  91:                     Cells(i, 4) = elem2.innerText
  92:                     bISBN_13 = True
  93:                 End If
  94:                 If bISBN_10 = True And bISBN_13 = True Then
  95:                     Exit For
  96:                 End If
  97:             Next elem2
  98:             If bISBN_10 = True And bISBN_13 = True Then
  99:                     Exit For
 100:             End If
 101:         Next elem1
 102:         ' if both ISBN-10 and ISBN-13 are not found, ignore the rest steps
 103:         If bISBN_10 = False And bISBN_13 = False Then
 104:             Cells(i, 3) = "Not Found"
 105:             Cells(i, 4) = "Not Found"
 106:         Else
 107:         End If
 108:     Next i
 109:     Columns(3).AutoFit
 110:     Columns(4).AutoFit
 111: End Sub
 112: Sub getBooksInfo()
 113:     ' for each row:
 114:     ' 1. get DOM objects from URL
 115:     ' 2. find title,
 116:     
 117:     ' find last row
 118:     Dim lastCell As Range
 119:     Dim lastRow As Integer
 120:     Set lastCell = Sheets("Control Panel").Range("B65536").End(xlUp)
 121:     lastRow = lastCell.Row
 122:     Dim firstRow As Integer
 123:     firstRow = 3
 124:     
 125:     Dim i As Integer
 126:     For i = firstRow To lastRow
 127:         If IsNumeric(Cells(i, 4)) Then
 128:             ' create a new "Temp" worksheet
 129:             If ExistSheet("Temp") Then
 130:                 Application.DisplayAlerts = False
 131:                 Sheets("Temp").Delete
 132:                 Application.DisplayAlerts = True
 133:             End If
 134:             Sheets.Add After:=Sheets(Sheets.Count)
 135:             ActiveSheet.Name = "Temp"
 136:             
 137:             Dim strISBN, strURL1, strURL2, strURL3, strURL4, strURL5, strURL6 As String
 138:             strISBN = Sheets("Control Panel").Cells(i, 4)
 139:             strURL1 = "URL;http://www.dealoz.com/prod2.pl?cat=book&op=buy&op2=buy&lang=en-us&search_country=us&shipto=us&cur=usd&zip=&nw=y&class=&pqcs=&quantity=&shipping_type=&sort=&catby=book.keyword&query="
 140:             strURL2 = "&asin=&ean="
 141:             strURL3 = "&upc=&mpn=&mfr="
 142:             strURL4 = "prod2.pl?cat=book&op=buy&op2=buy&lang=en-us&search_country=us&shipto=us&cur=usd&zip=&nw=y&class=&pqcs=&quantity=&shipping_type=&sort=&catby=book.keyword&query="
 143:             strURL5 = "&asin=&ean="
 144:             strURL6 = "&upc=&mpn=&mfr="
 145:             Sheets("Temp").Select
 146:             With ActiveSheet.QueryTables.Add(Connection:= _
 147:                 strURL1 & strISBN & strURL2 & strISBN & strURL3 _
 148:                 , Destination:=Range("A1"))
 149:                 .Name = _
 150:                 strURL4 & strISBN & strURL5 & strISBN & strURL6
 151:                 .FieldNames = True
 152:                 .RowNumbers = False
 153:                 .FillAdjacentFormulas = False
 154:                 .PreserveFormatting = True
 155:                 .RefreshOnFileOpen = False
 156:                 .BackgroundQuery = True
 157:                 .RefreshStyle = xlInsertDeleteCells
 158:                 .SavePassword = False
 159:                 .SaveData = True
 160:                 .AdjustColumnWidth = True
 161:                 .RefreshPeriod = 0
 162:                 .WebSelectionType = xlSpecifiedTables
 163:                 .WebFormatting = xlWebFormattingNone
 164:                 .WebTables = "10"
 165:                 .WebPreFormattedTextToColumns = True
 166:                 .WebConsecutiveDelimitersAsOne = True
 167:                 .WebSingleBlockTextImport = False
 168:                 .WebDisableDateRecognition = False
 169:                 .WebDisableRedirections = False
 170:                 .Refresh BackgroundQuery:=False
 171:             End With
 172:             Sheets("Control Panel").Select
 173:             Dim str As String
 174:             ' get Title
 175:             Sheets("Control Panel").Cells(i, 5) = Sheets("Temp").Cells(1, 1)
 176:             ' get Author
 177:             Sheets("Control Panel").Cells(i, 6) = Trim(Replace(Sheets("Temp").Cells(2, 1), "Author:", ""))
 178:             ' split data of edition, publisher, format
 179:             Dim varSplit As Variant
 180:             Dim j As Integer
 181:             varSplit = Split(Sheets("Temp").Cells(3, 1), " ")
 182:             ' get Edition
 183:             For j = LBound(varSplit) To UBound(varSplit)
 184:                 If InStr(varSplit(j), "Edition") Then
 185:                     Sheets("Control Panel").Cells(i, 7) = varSplit(j + 1)
 186:                 End If
 187:                 'MsgBox "Item " & intIndex & " is " & avarSplit(intIndex) & _
 188:                 '" which is " & Len(avarSplit(intIndex)) & " characters long", vbInformation
 189:             Next
 190:             ' get Publisher
 191:             Dim iStart, iEnd As Integer
 192:             iStart = 0
 193:             iEnd = 0
 194:             For j = LBound(varSplit) To UBound(varSplit)
 195:                 ' find the start and end string of publisher
 196:                 If InStr(varSplit(j), "Publisher") Then
 197:                     iStart = j + 1
 198:                 End If
 199:                 If InStr(varSplit(j), "Hard") Or InStr(varSplit(j), "Paper") Then
 200:                     iEnd = j - 1
 201:                 End If
 202:             Next j
 203:             ' if iStart=0, no publisher info
 204:             If iStart = 0 Then
 205:                 Exit For
 206:             End If
 207:             ' if iEnd=0, join the rest strings as publisher
 208:             If iEnd = 0 Then
 209:                 iEnd = UBound(varSplit)
 210:             End If
 211:             Dim strArr(256) As String
 212:             Dim k As Integer
 213:             For k = iStart To iEnd
 214:                 strArr(k - iStart + 1) = varSplit(k)
 215:             Next k
 216:             Sheets("Control Panel").Cells(i, 8) = Trim(Join(strArr))
 217:             ' get format
 218:             For j = LBound(varSplit) To UBound(varSplit)
 219:                 If InStr(varSplit(j), "Hard") Then
 220:                     Sheets("Control Panel").Cells(i, 9) = Replace(varSplit(j), ":", "")
 221:                     Exit For
 222:                 End If
 223:                 If InStr(varSplit(j), "Paper") Then
 224:                     Sheets("Control Panel").Cells(i, 9) = Replace(varSplit(j), ":", "")
 225:                     Exit For
 226:                 End If
 227:             Next
 228:         End If ' if ISBN isNumeric
 229:     Next i
 230:     Sheets("Control Panel").Select
 231:     Columns("E:I").Select
 232:     Selection.Columns.AutoFit
 233: End Sub