1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
| import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlPage; import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import org.jsoup.nodes.Document;
public class jsTest {
static Map<String,String> countMap = new HashMap<>();
public static void main(String[] args) { Map<String,String> urlMap = new HashMap<>(255); String baseUrl = "https://www.jianshu.com/u/2eb26c0a6d3b?order_by=shared_at&page="; getTime(baseUrl,urlMap); urlMap.forEach((k,v)->{ try { System.out.println(k + " = " + v); }catch (Exception e){ e.printStackTrace(); } }); }
private static void getTime(String baseUrl,Map urlMap){ try{
String currentUrl = ""; int pageIndex=0; do{ currentUrl = baseUrl+pageIndex; pageIndex++; System.out.println("currentUrl = " + currentUrl); }while (getJianShuArticleUrlList(currentUrl,urlMap));
}catch (Exception e){ e.printStackTrace(); } }
public static boolean getJianShuArticleUrlList(String oneUrl, Map<String,String> urlMap){ boolean res = true; WebClient webClient = new WebClient(); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); try{ HtmlPage page = webClient.getPage(oneUrl); if(page==null||!page.isHtmlPage()){ return false; } Document doc = Jsoup.parse(page.asXml()); Elements dates = doc.select("[class=note-list] li"); Iterator var2 = dates.iterator(); Element element = null; Element titleElement = null; Element timeElement = null; while(var2.hasNext()) { element = (Element)var2.next(); titleElement = element.select("[class=title]").first(); timeElement = element.select("[class=time]").first(); if(timeElement==null){ timeElement = element.select("[data-type=share_note]").first(); } if(timeElement!=null&&titleElement!=null){ String tag = "data-shared-at"; if(StringUtils.isBlank(timeElement.attr(tag))){ tag = "data-datetime"; } if(StringUtils.isNotBlank(titleElement.text())&&StringUtils.isNotBlank(timeElement.attr(tag))){ if(StringUtils.isNotBlank(urlMap.get(titleElement.text()))){ int count = countMap.get(titleElement.text())==null?1:Integer.valueOf(countMap.get(titleElement.text())).intValue(); count++; countMap.put(titleElement.text(),count+""); if(count==3){ return false; } } String dateStr = timeElement.attr(tag); dateStr = dateStr.replace("T"," "); dateStr = dateStr.replace("+08:00",""); urlMap.put(titleElement.text(),dateStr); } }else { return false; } } }catch (Exception e){ e.printStackTrace(); res=false; } return res; } }
|