package com.thunisoft.kms.java.lvl2.exam;
????
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
????
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
import javax.swing.text.html.parser.ParserDelegator;
????
/**
?* Title: <br>
?* Description: <br>
?* Copyright: Copyright (c) 2007<br>
?* Company:<br>
?*
?* @author keep at it
?* @version 1.0
?* @date 2013-12-4
?*/
public class GrapWeatherInfo extends ParserCallback
{
????/** 是否是table标签 */
????protected boolean isTable = false;
????/** 是否是a标签 */
????protected boolean isAlink = false;
????/** 是否是div标签 */
????protected boolean isDiv = false;
????/** 是否是td标签 */
????protected boolean isTd = false;
????/** 放符合条件的元素 */
????protected static Vector<String> element = new Vector<String>();
????protected static String paragraphText = new String();
????/** 要获取文件在网络中的URL */
????private static final String FILE_URL =
????/** 文件在本地磁盘的存储位置 */
????private static final String FILE_LOCATION = "E:/url.html";
????
????/** 构造方法 */
????public GrapWeatherInfo()
????{
????
????}
????
????/**
?????* 开始解析
?????*
?????* @param r
?????*/
????private static void startParse(Reader r)
????{
????????try
????????{
?????????ParserDelegator ps = new ParserDelegator();
?????????????
?????????????
?????????????
????????????HTMLEditorKit.ParserCallback parser =
new GrapWeatherInfo();
????????????ps.parse(r, parser, true);
????????????Vector<String> link = element;
????????????String temp = "";
????????????for (int i = 1; i < link.size(); i++)
????????????{
????????????????if (link.get(i).contains("星期"))
????????????????{
????????????????????temp = link.get(i);
????????????????}
????????????????if (link.get(i).equals(";"))
????????????????{
????????????????????System.out.println();
????????????????}
????????????????else if (!link.get(i).equals(">"))
????????????????{
????????????????????
????????????????????
????????????????????if (link.get(i).endsWith("夜间")
??????????????????????&& !link.get(i - 1).contains("星期"))
????????????????????{
????????????????????????System.out.println();
????????????????????????System.out.print(temp + "?? ");
????????????????????????System.out.print(link.get(i) + "?? ");
????????????????????}
????????????????????else
????????????????????{
????????????????????????System.out.print(link.get(i) + "?? ");
????????????????????}
????????????????}
????????????}
????
????????}
????????catch (Exception e)
????????{
????????????e.printStackTrace();
????????}
????}
????
????/**
?????* 处理文本
?????*
?????* @param data
?????* @param pos
?????*/
????public void handleText(char[] data, int pos)
????{
????????Pattern p = Pattern.compile("\\s*|\t|\r|\n");
????????Matcher m = null;
????????if (isAlink)
????????{
????????????String tempParagraphText = new String(data);
????????????m = p.matcher(tempParagraphText);
????????????if (paragraphText != null)
????????????{
?????????????
??????????????element.addElement(m.replaceAll(""));
????????????}
????????}
????????else if (isTd)
????????{
????????????String tempParagraphText = new String(data);
????????????m = p.matcher(tempParagraphText);
????????????if (paragraphText != null)
????????????{
????????????????
????????????????element.addElement(m.replaceAll(""));
????????????}
????????}
????}
????
????/**
?????* 处理开始标签
?????*
?????* @param t
?????* @param a
?????* @param pos
?????*/
????public void handleStartTag(HTML.Tag t,
???????????????????????MutableAttributeSet a, int pos)
????{
????????
????????????+a.getAttribute(HTML.Attribute.ID)+"? "
????????????+a.getAttribute(HTML.Attribute.CLASS));
????????
????????if (t == HTML.Tag.DIV)
????????{
????????????
????????????if ("7d".equals(a.getAttribute(HTML.Attribute.ID)))
????????????{
????????????????
????????????????isDiv = true;
????????????}
????????}
????????
????????if (t == HTML.Tag.TABLE)
????????{
????????????
????????????
????????????if ("yuBaoTable".equals(
??????????????????a.getAttribute(HTML.Attribute.CLASS)))
????????????{
????????????????
????????????????isTable = true;
????????????}
????????}
????????
????????if (t == HTML.Tag.A && isDiv)
????????{
????
????????????if (a.getAttribute(HTML.Attribute.ID) == null)
????????????{
????????????????if (a.getAttribute(HTML.Attribute.HREF) != null ?
??????????????????a.getAttribute(HTML.Attribute.HREF).toString()
????????????????????????.endsWith(".php") : false)
????????????????{
????????????????????
????????????????????isAlink = true;
????????????????}
????
????????????}
????????}
????????if (t == HTML.Tag.TD && isDiv)
????????{
????????????isTd = true;
????????}
????}
????
????/**
?????* 解析出问题时的处理方法
?????*
?????* @param errorMsg
?????* @param pos
?????*/
????public void handleError(String errorMsg, int pos)
????{
????}
????
????/**
?????* 处理普通tag
?????*
?????* @param t
?????* @param a
?????* @param pos
?????*/
????public void handleSimpleTag(HTML.Tag t,
??????????????????????????MutableAttributeSet a, int pos)
????{
????????handleStartTag(t, a, pos);
????}
????
????/**
?????* getter method
?????*
?????* @return
?????*/
????public static String getParagraphText()
????{
????????return paragraphText;
????}
????
????/**
?????* 处理注释
?????*
?????* @param data
?????* @param pos
?????*/
????public void handleComment(char[] data, int pos)
????{
????}
????
????/**
?????* 处理end tag
?????*
?????* @param t
?????* @param pos
?????*/
????public void handleEndTag(HTML.Tag t, int pos)
????{
????????
????????
????????if (t == HTML.Tag.A)
????????{
????????????if (isAlink)
????????????{
????????????????isAlink = false;
????????????}
????????}
????????else if (t == HTML.Tag.TABLE && isAlink == false)
????????{
????????????if (isTable)
????????????{
????????????????isTable = false;
????????????????
?????????????????
????????????}
????????????element.addElement(new String(";"));
????????}
????????else if (t == HTML.Tag.DIV && isTable == false)
????????{
????????????if (isDiv == true && isTable == false)
????????????{
????????????????isDiv = false;
????????????}
????????}
????????else if (t == HTML.Tag.TD)
????????{
????????????isTd = false;
????????}
????}
????
????/**
?????* 程序的入口
?????*
?????* @param args
?????*/
????public static void main(String args[])
????{
????????InputStream input = null;
????????FileOutputStream fos = null;
????????BufferedReader brd = null;
????????try
????????{
????????????
????????????URL url = new URL(FILE_URL);
????????????
????????????URLConnection conn = url.openConnection();
????????????conn.connect();
????????????
????????????input = conn.getInputStream();
????????????
????????????fos = new FileOutputStream(FILE_LOCATION);
????????????byte[] b = new byte[1024];
????????????int read = 0;
????????????
????????????while ((read = input.read(b)) != -1)
????????????{
????????????????fos.write(b, 0, read);
????????????}
????????????
????????????brd = new BufferedReader(
?????????????????????new InputStreamReader(
?????????????????????new FileInputStream(
??????????????????????????FILE_LOCATION), "UTF-8"));
????????????
????????????startParse(brd);
????????}
????????catch (Exception e)
????????{
????????????e.printStackTrace();
????????}
????????finally
????????{
????????????
????????????if (input != null)
????????????{
????????????????try
????????????????{
????????????????????input.close();
????????????????}
????????????????catch (IOException e)
????????????????{
????????????????????input = null;
????????????????}
????????????}
????
????????????if (fos != null)
????????????{
????????????????try
????????????????{
????????????????????fos.close();
????????????????}
????????????????catch (IOException e)
????????????????{
????????????????????fos = null;
????????????????}
????????????}
????
????????????if (brd != null)
????????????{
????????????????try
????????????????{
????????????????????brd.close();
????????????????}
????????????????catch (IOException e)
????????????????{
????????????????????brd = null;
????????????????}
????????????}
????????}
????}
}