现在的位置: 首页 > 综合 > 正文

从一个HTML返回所有的图片链接

2013年09月07日 ⁄ 综合 ⁄ 共 1783字 ⁄ 字号 评论关闭

 
uses mshtml, ActiveX, COMObj, IdHTTP, idURI;

{ .... }

procedure GetImageLinks(AURL: string; AList: TStrings);
var
   IDoc: IHTMLDocument2;
   strHTML: string;
   v: Variant;
   x: Integer;
   ovLinks: OleVariant;
   DocURL: string;
   URI: TidURI;
   ImgURL: string;
   idHTTP: TidHTTP;
begin
   AList.Clear;
   URI := TidURI.Create(AURL);
   try
     DocURL := ’http://’ + URI.Host;
     if URI.Path <> ’/’ then
       DocURL := DocURL + URI.Path;
   finally
     URI.Free;
   end;
   Idoc := CreateComObject(Class_HTMLDocument) as IHTMLDocument2;
   try
     IDoc.designMode := ’on’;
     while IDoc.readyState <> ’complete’ do
       Application.ProcessMessages;
     v       := VarArrayCreate([0, 0], VarVariant);
     idHTTP := TidHTTP.Create(nil);
     try
       strHTML := idHTTP.Get(AURL);
     finally
       idHTTP.Free;
     end;
     v[0] := strHTML;
     IDoc.Write(PSafeArray(System.TVarData(v).VArray));
     IDoc.designMode := ’off’;
     while IDoc.readyState <> ’complete’ do
       Application.ProcessMessages;
     ovLinks := IDoc.all.tags(’IMG’);
     if ovLinks.Length > 0 then
     begin
       for x := 0 to ovLinks.Length - 1 do
       begin
         ImgURL := ovLinks.Item(x).src;
         // The stuff below will probably need a little tweaking
         // Deteriming and turning realtive URLs into absolute URLs
         // is not that difficult but this is all I could come up with
         // in such a short notice.
         if (ImgURL[1] = ’/’) then
         begin
           // more than likely a relative URL so
           // append the DocURL
           ImgURL := DocURL + ImgUrl;
         end
         else
         begin
           if (Copy(ImgURL, 1, 11) = ’about:blank’) then
           begin
             ImgURL := DocURL + Copy(ImgUrl, 12, Length(ImgURL));
           end;
         end;
         AList.Add(ImgURL);
       end;
     end;
   finally
     IDoc := nil;
   end;
end;

// Beispiel:
// Example:
procedure TForm1.Button1Click(Sender: TObject);
begin
   GetImageLinks(’http://www.swissdelphicenter.ch’, Memo1.Lines);
end;

抱歉!评论已关闭.