1: Option Explicit
2: Function ExistSheet(shtName As String)
3: Dim Sht As Object
4: On Error Resume Next
5: Set Sht = Sheets(shtName)
6: If Err.Number = 0 Then
7: ExistSheet = True
8: Else
9: ExistSheet = False
10: End If
11: Set Sht = Nothing
12: End Function
13: Public Sub getURLs()
14: ' find last row
15: Dim lastCell As Range
16: Set lastCell = Sheets("Control Panel").Range("A65536").End(xlUp)
17: Dim lastRow As Integer
18: lastRow = lastCell.Row
19:
20: Dim hlink As Hyperlink
21: Dim firstRow As Integer
22: firstRow = 3
23:
24: For Each hlink In Sheets("Control Panel").Range(Cells(firstRow, 1), Cells(lastRow, 1)).Hyperlinks
25: Cells(hlink.Range.Row, 2) = hlink.Address
26: Next
27: End Sub
28: Sub getISBN()
29: ' in Excel, long numeric strings will be displayed in scientific notation
30: ' in order to avoid scientific notation
31: Cells.Select
32: Selection.NumberFormatLocal = "0"
33:
34: ' for each row:
35: ' 1. get DOM objects from URL
36: ' 2. find ISBN-10 and ISBN-13
37:
38: ' find last row
39: Dim lastCell As Range
40: Dim lastRow As Integer
41: Set lastCell = Sheets("Control Panel").Range("B65536").End(xlUp)
42: lastRow = lastCell.Row
43: Dim firstRow As Integer
44: firstRow = 3
45:
46: ' get DOM objs
47: Dim ie As WebBrowser
48: Set ie = CreateObject("InternetExplorer.Application")
49: Dim doc As HTMLDocument
50: Dim elements As IHTMLElementCollection
51: Dim elem1, elem2 As IHTMLElement ' find 2 layers of document objs
52: Dim strURL As String
53: Dim bISBN_10, bISBN_13 As Boolean
54: Dim i As Integer
55: For i = firstRow To lastRow
56: strURL = Cells(i, 2).Value
57: ie.navigate (strURL)
58: ie.Visible = True
59: Do Until ie.readyState = READYSTATE_COMPLETE
60: DoEvents
61: Loop
62:
63: Set doc = ie.document
64: ' only search text inside table
65: Set elements = doc.getElementsByTagName("TD")
66:
67: bISBN_10 = False
68: bISBN_13 = False
69: ' check first layer
70: For Each elem1 In elements
71: If bISBN_10 = False And IsNumeric(elem1.innerText) And Len(Trim(elem1.innerText)) = 10 Then
72: Cells(i, 3) = elem1.innerText
73: bISBN_10 = True
74: End If
75: If bISBN_13 = False And IsNumeric(elem1.innerText) And Len(Trim(elem1.innerText)) = 13 Then
76: Cells(i, 4) = elem1.innerText
77: bISBN_13 = True
78: End If
79: If bISBN_10 = True And bISBN_13 = True Then
80: Exit For
81: End If
82: Next elem1
83: ' check second layer
84: For Each elem1 In elements
85: For Each elem2 In elem1.all
86: If bISBN_10 = False And IsNumeric(elem2.innerText) And Len(Trim(elem2.innerText)) = 10 Then
87: Cells(i, 3) = elem2.innerText
88: bISBN_10 = True
89: End If
90: If bISBN_13 = False And IsNumeric(elem2.innerText) And Len(Trim(elem2.innerText)) = 13 Then
91: Cells(i, 4) = elem2.innerText
92: bISBN_13 = True
93: End If
94: If bISBN_10 = True And bISBN_13 = True Then
95: Exit For
96: End If
97: Next elem2
98: If bISBN_10 = True And bISBN_13 = True Then
99: Exit For
100: End If
101: Next elem1
102: ' if both ISBN-10 and ISBN-13 are not found, ignore the rest steps
103: If bISBN_10 = False And bISBN_13 = False Then
104: Cells(i, 3) = "Not Found"
105: Cells(i, 4) = "Not Found"
106: Else
107: End If
108: Next i
109: Columns(3).AutoFit
110: Columns(4).AutoFit
111: End Sub
112: Sub getBooksInfo()
113: ' for each row:
114: ' 1. get DOM objects from URL
115: ' 2. find title,
116:
117: ' find last row
118: Dim lastCell As Range
119: Dim lastRow As Integer
120: Set lastCell = Sheets("Control Panel").Range("B65536").End(xlUp)
121: lastRow = lastCell.Row
122: Dim firstRow As Integer
123: firstRow = 3
124:
125: Dim i As Integer
126: For i = firstRow To lastRow
127: If IsNumeric(Cells(i, 4)) Then
128: ' create a new "Temp" worksheet
129: If ExistSheet("Temp") Then
130: Application.DisplayAlerts = False
131: Sheets("Temp").Delete
132: Application.DisplayAlerts = True
133: End If
134: Sheets.Add After:=Sheets(Sheets.Count)
135: ActiveSheet.Name = "Temp"
136:
137: Dim strISBN, strURL1, strURL2, strURL3, strURL4, strURL5, strURL6 As String
138: strISBN = Sheets("Control Panel").Cells(i, 4)
139: strURL1 = "URL;http://www.dealoz.com/prod2.pl?cat=book&op=buy&op2=buy&lang=en-us&search_country=us&shipto=us&cur=usd&zip=&nw=y&class=&pqcs=&quantity=&shipping_type=&sort=&catby=book.keyword&query="
140: strURL2 = "&asin=&ean="
141: strURL3 = "&upc=&mpn=&mfr="
142: strURL4 = "prod2.pl?cat=book&op=buy&op2=buy&lang=en-us&search_country=us&shipto=us&cur=usd&zip=&nw=y&class=&pqcs=&quantity=&shipping_type=&sort=&catby=book.keyword&query="
143: strURL5 = "&asin=&ean="
144: strURL6 = "&upc=&mpn=&mfr="
145: Sheets("Temp").Select
146: With ActiveSheet.QueryTables.Add(Connection:= _
147: strURL1 & strISBN & strURL2 & strISBN & strURL3 _
148: , Destination:=Range("A1"))
149: .Name = _
150: strURL4 & strISBN & strURL5 & strISBN & strURL6
151: .FieldNames = True
152: .RowNumbers = False
153: .FillAdjacentFormulas = False
154: .PreserveFormatting = True
155: .RefreshOnFileOpen = False
156: .BackgroundQuery = True
157: .RefreshStyle = xlInsertDeleteCells
158: .SavePassword = False
159: .SaveData = True
160: .AdjustColumnWidth = True
161: .RefreshPeriod = 0
162: .WebSelectionType = xlSpecifiedTables
163: .WebFormatting = xlWebFormattingNone
164: .WebTables = "10"
165: .WebPreFormattedTextToColumns = True
166: .WebConsecutiveDelimitersAsOne = True
167: .WebSingleBlockTextImport = False
168: .WebDisableDateRecognition = False
169: .WebDisableRedirections = False
170: .Refresh BackgroundQuery:=False
171: End With
172: Sheets("Control Panel").Select
173: Dim str As String
174: ' get Title
175: Sheets("Control Panel").Cells(i, 5) = Sheets("Temp").Cells(1, 1)
176: ' get Author
177: Sheets("Control Panel").Cells(i, 6) = Trim(Replace(Sheets("Temp").Cells(2, 1), "Author:", ""))
178: ' split data of edition, publisher, format
179: Dim varSplit As Variant
180: Dim j As Integer
181: varSplit = Split(Sheets("Temp").Cells(3, 1), " ")
182: ' get Edition
183: For j = LBound(varSplit) To UBound(varSplit)
184: If InStr(varSplit(j), "Edition") Then
185: Sheets("Control Panel").Cells(i, 7) = varSplit(j + 1)
186: End If
187: 'MsgBox "Item " & intIndex & " is " & avarSplit(intIndex) & _
188: '" which is " & Len(avarSplit(intIndex)) & " characters long", vbInformation
189: Next
190: ' get Publisher
191: Dim iStart, iEnd As Integer
192: iStart = 0
193: iEnd = 0
194: For j = LBound(varSplit) To UBound(varSplit)
195: ' find the start and end string of publisher
196: If InStr(varSplit(j), "Publisher") Then
197: iStart = j + 1
198: End If
199: If InStr(varSplit(j), "Hard") Or InStr(varSplit(j), "Paper") Then
200: iEnd = j - 1
201: End If
202: Next j
203: ' if iStart=0, no publisher info
204: If iStart = 0 Then
205: Exit For
206: End If
207: ' if iEnd=0, join the rest strings as publisher
208: If iEnd = 0 Then
209: iEnd = UBound(varSplit)
210: End If
211: Dim strArr(256) As String
212: Dim k As Integer
213: For k = iStart To iEnd
214: strArr(k - iStart + 1) = varSplit(k)
215: Next k
216: Sheets("Control Panel").Cells(i, 8) = Trim(Join(strArr))
217: ' get format
218: For j = LBound(varSplit) To UBound(varSplit)
219: If InStr(varSplit(j), "Hard") Then
220: Sheets("Control Panel").Cells(i, 9) = Replace(varSplit(j), ":", "")
221: Exit For
222: End If
223: If InStr(varSplit(j), "Paper") Then
224: Sheets("Control Panel").Cells(i, 9) = Replace(varSplit(j), ":", "")
225: Exit For
226: End If
227: Next
228: End If ' if ISBN isNumeric
229: Next i
230: Sheets("Control Panel").Select
231: Columns("E:I").Select
232: Selection.Columns.AutoFit
233: End Sub
沒有留言:
張貼留言