Jsoup 解析 网站电视剧信息问题????

黛曦葛溪 发布于 2012/08/30 17:45
阅读 877
收藏 0

我现在 解析hao123 网站的所有视频信息 ,到解析 电视剧的时候遇到问题了,现在 已经得到 <script> 里面的信息了,我想得到  里面的  如下信息 :

 

我想得到的结果是:

奇艺:http://www.iqiyi.com/dianshiju/ssdpg.html 这样的  格式
window.tv.prov["11197"] = {
"qiyi":{"n":"奇艺","u":"http:\/\/www.iqiyi.com\/dianshiju\/ssdpg.html"},

"youku":{"n":"优酷","u":"http:\/\/www.youku.com\/show_page\/id_z7798af6a893211e1b356.html"},

"letv":{"n":"乐视","u":"http:\/\/www.letv.com\/ptv\/pplay\/77505.html"},

"qq":{"n":"腾讯","u":"http:\/\/v.qq.com\/detail\/h\/h0029i4nvzorean.html"},

"sohu":{"n":"搜狐","u":"http:\/\/tv.sohu.com\/s2012\/ssdpg\/"}

};
那么 我怎么才能得到 里面的 各个链接地址呢,我现在需要得到这些链接地址,然后进入解析 每一家网站的  每一集 的链接地址,
还有就是,因为每一个网站的 链接 放的 标签  定义的 属性什么的也不同,所以 还要判断一下 我打开的网站是哪家的 然后 用对应的 解析方法:
比如:
if(优酷家的网站){
解析优酷加的方法
}
这样就要写很多 解析方法,有没有 一个更好地解决方法呢????
加载中
0
黛曦葛溪
黛曦葛溪

我用下面这个方法  取出来了  如下信息,

"sina":{"n":"新浪","u":"http:\/\/video.sina.com.cn\/movie\/detail\/wstzb"},

"youku":{"n":"优酷","u":"http:\/\/www.youku.com\/show_page\/id_z0cd7e0a0e0d311df97c0.html"},

"letv":{"n":"乐视","u":"http:\/\/www.letv.com\/ptv\/pplay\/36143.html"},

"qiyi":{"n":"奇艺","u":"http:\/\/www.iqiyi.com\/dianshiju\/wstzb.html"},

"sohu":{"n":"搜狐","u":"http:\/\/tv.sohu.com\/s2011\/wstzb\/"}}

;window.tvData || (window.tvData = {});window.tvData["1872"] = {"id":"1872", "title":"我是特种兵", "playurl":"http:\/\/video.sina.com.cn\/movie\/detail\/wstzb"}


但是我怎么得到  里面的  新浪,优酷 等字段呢?    还有对应的  http 地址,
1872  这是 window.tv.prov["11197"] []里面的值,每个电影信息里面的之都不同,所以可以使用josn    window.tv.prov["11197"].qiyi.u   这样解析,但是 有的电视链接  网站的多少不同,这个写死了还是不行。。。。我该怎么做呢????



 Elements href =element.select("script");
					String str =href.toString();
					 String p0 = "window\\.tv\\.prov\\[\"(\\d*)\"\\]";
				        Pattern pattern0 = Pattern.compile(p0, Pattern.MULTILINE);
				        Matcher m0 = pattern0.matcher(str);
				        if(m0.find()) {
				            System.out.println(m0.group(1));
				        }
				        String p = "\"\\w*\":\\{\"n\":\".*\",\"u\":\"(.*)\"\\}";
				       
				        Pattern pattern = Pattern.compile(p, Pattern.MULTILINE);
				        Matcher m = pattern.matcher(str);
				       
				        //int group = 1;
				        while(m.find()) {
				            System.out.println(m.group());
				           // m.start(group);
				        }

0
黛曦葛溪
黛曦葛溪

html 里面<script>的全部代码 如下:

<script>
	window.tv || (window.tv = {});
	window.tv.eps = window.tv.eps || {};
	window.tv.eps["11197"] = {

				"1":"http:\/\/www.iqiyi.com\/dianshiju\/20120601\/b501d0a494d0a7fd.html",
				"2":"http:\/\/www.iqiyi.com\/dianshiju\/20120601\/956ea8bc0e1c6e91.html",
				"3":"http:\/\/www.iqiyi.com\/dianshiju\/20120603\/a4f397f6fb81e25f.html",
				"4":"http:\/\/www.iqiyi.com\/dianshiju\/20120604\/1aa5c4bce102aae1.html",
				"5":"http:\/\/www.iqiyi.com\/dianshiju\/20120610\/e83c2279ba5bb290.html",
				"6":"http:\/\/www.iqiyi.com\/dianshiju\/20120611\/d8886e2da56533bc.html",
				"7":"http:\/\/www.iqiyi.com\/dianshiju\/20120617\/3ab9df7c56085e7f.html",
				"8":"http:\/\/www.iqiyi.com\/dianshiju\/20120618\/e5212fb3821f9203.html",
				"9":"http:\/\/www.iqiyi.com\/dianshiju\/20120624\/ae1ddbfff4dd4b61.html",
				"10":"http:\/\/www.iqiyi.com\/dianshiju\/20120625\/3688da2a73ec695b.html",
				"11":"http:\/\/www.iqiyi.com\/dianshiju\/20120701\/d433d947de77a1ec.html",
				"12":"http:\/\/www.iqiyi.com\/dianshiju\/20120702\/bac80f73b4584391.html",
				"13":"http:\/\/www.iqiyi.com\/dianshiju\/20120708\/429aa9ea0040decf.html",
				"14":"http:\/\/www.iqiyi.com\/dianshiju\/20120709\/c4d6436400a268de.html",
				"15":"http:\/\/www.iqiyi.com\/dianshiju\/20120715\/aca5880831c37906.html",
				"16":"http:\/\/www.iqiyi.com\/dianshiju\/20120716\/a72181a1276479e1.html",
				"17":"http:\/\/www.iqiyi.com\/dianshiju\/20120722\/bc92f17a4a99601b.html",
				"18":"http:\/\/www.iqiyi.com\/dianshiju\/20120723\/e9316c9d4cc5de5a.html",
				"19":"http:\/\/www.iqiyi.com\/dianshiju\/20120812\/61797bff0b4b901a.html",
				"20":"http:\/\/www.iqiyi.com\/dianshiju\/20120813\/c0588babddfa7a93.html"

				};

          window.tv.prov = window.tv.prov || {};
	 window.tv.prov["11197"] = {
		 
		"qiyi":{"n":"奇艺","u":"http:\/\/www.iqiyi.com\/dianshiju\/ssdpg.html"},

		"youku":{"n":"优酷","u":"http:\/\/www.youku.com\/show_page\/id_z7798af6a893211e1b356.html"},

		"letv":{"n":"乐视","u":"http:\/\/www.letv.com\/ptv\/pplay\/77505.html"},

		"qq":{"n":"腾讯","u":"http:\/\/v.qq.com\/detail\/h\/h0029i4nvzorean.html"},

		"sohu":{"n":"搜狐","u":"http:\/\/tv.sohu.com\/s2012\/ssdpg\/"}
             
		};

      window.tvData || (window.tvData = {});

	window.tvData["11197"] = {"id":"11197", "title":"绅士的品格", "playurl":"http:\/

\/www.iqiyi.com\/dianshiju\/ssdpg.html"};

</script>

返回顶部
顶部