import java.net.*; import java.io.*; import java.lang.*; import java.util.*; /*download new car information from www.edmunds.com and store them into xml files */ public class carsXML { //to contain list of urls pointing to local xml files static PrintWriter xmlurls; static String EDMUNDS_NEW="http://www.edmunds.com/vehicles/"; public static void main(String[] args) { try { xmlurls=new PrintWriter(new FileWriter("cars.url")); } catch (IOException ie) { ie.printStackTrace(); } parse_new_cars(); //parse_model("32cl","32cl",EDMUNDS_NEW+"2001/acura/clseries/32cl.html"); //parse_manufacturer("Honda",EDMUNDS_NEW+"2001/honda/"); xmlurls.close(); } private static void parse_new_cars() { String index_file=getURL.geturl(EDMUNDS_NEW,"main.html"); String line; StringTokenizer st,st2; boolean flag; String tmp,tmpurl,tmpmake; int i; try { BufferedReader br=new BufferedReader(new FileReader(index_file)); //parse index file listing all new car manufacturers while (br.ready()) { line=br.readLine(); if ( (line.startsWith("-1) ) { //first car on this line i=line.indexOf("",i)-1); tmpmake=line.substring(line.indexOf(">",i)+1,line.indexOf("<",i)); parse_manufacturer(tmpmake,tmpurl); //second car on this line i=line.indexOf("",i)-1); tmpmake=line.substring(line.indexOf(">",i)+1,line.indexOf("<",i)); parse_manufacturer(tmpmake,tmpurl); /* st=new StringTokenizer(line," "); flag=false; while ( st.hasMoreTokens() && (!flag) ) { tmp=st.nextToken(); if (tmp.equals(""); tmpmake=st2.nextToken(); tmpmake=st2.nextToken(); //parse data for this manufacturer parse_manufacturer(tmpmake,tmpurl); } } } */ } } br.close(); } catch (Exception e) { e.printStackTrace(); } } private static void parse_manufacturer(String make, String url) { String makefile=getURL.geturl(url,"tmp1"); String line,modelname="",trimname="",trimurl=""; StringTokenizer st; try { BufferedReader br=new BufferedReader(new FileReader(makefile)); //parse file listing all models of this make while (br.ready()) { line=br.readLine(); if ( (line.startsWith("")<0) line=br.readLine(); modelname=line.substring(line.indexOf("")+3, line.indexOf("")-1 ); //get the individual trims and parse those files while (!line.startsWith("")) { if (line.startsWith("
  • "); while (st.hasMoreTokens()) trimname=st.nextToken(); //parse file for this trim parse_model(modelname,trimname,trimurl); } line=br.readLine(); } } } br.close(); } catch (Exception e) { e.printStackTrace(); try { BufferedReader b=new BufferedReader(new InputStreamReader(System.in)); b.readLine(); }catch (Exception e2) {;} } } private static void parse_model2(String modelname,String trimname, String url) { System.out.println(modelname+","+trimname+","+url); } private static void parse_model(String modelname,String trimname, String url) { //System.out.println(modelname+","+trimname+","+url); String modelfile=getURL.geturl(url,"tmp2"); String line="",tmp,tmp2; StringTokenizer st; PrintWriter outxml; int i; try { BufferedReader br=new BufferedReader(new FileReader(modelfile)); tmp=slash2underscore(space2underscore(trimname))+".xml"; outxml=new PrintWriter(new FileWriter(tmp)); outxml.println("\n\n"); xmlurls.println(tmp); //parse file with info about this model //year of car st=new StringTokenizer(modelname," "); tmp=st.nextToken(); outxml.println(""+tmp+""); line=br.readLine(); //first: make, model, class, bodystyle, drivetype,where built while (!line.startsWith("Make:")) line=br.readLine(); //make tmp=line.substring(line.indexOf("")+4, line.indexOf(""+tmp+""); tmp2=tmp; //model line=br.readLine(); tmp=line.substring(line.indexOf("")+4, line.indexOf(""+tmp+""); tmp2=tmp2+" "+tmp; outxml.println(""+tmp2+""); //trim outxml.println(""+trimname+""); //class line=br.readLine(); tmp=line.substring(line.indexOf("")+4, line.indexOf(""+tmp+""); //bodystyle line=br.readLine(); tmp=line.substring(line.indexOf("")+4, line.indexOf(""+tmp+""); //drivetype line=br.readLine(); tmp=line.substring(line.indexOf("")+4, line.indexOf(""+tmp+""); //buildlocation line=br.readLine(); tmp=line.substring(line.indexOf("")+4, line.indexOf(""+tmp+""); //whatsnew while (!line.startsWith("")) line=br.readLine(); line=br.readLine(); st=new StringTokenizer(line,"<"); outxml.println(""+st.nextToken()+""); // pros/cons while (!line.startsWith("",7)+1, min( line.indexOf("<",line.indexOf(">",7)),line.length())); outxml.println(""+tmp+""); if (tmp.indexOf("not yet been compiled")>-1) { outxml.println(""+tmp+""); } else { line=br.readLine(); while (!line.startsWith("",7)+1, min( line.indexOf("<",line.indexOf(">",7)),line.length())); outxml.println(""+tmp+""); } //edmund's review while (!line.startsWith(""); while ( (!line.startsWith("")) && (!line.trim().equals("")) && (!line.trim().toLowerCase().equals("

    ")) && (!line.startsWith("a name")) ){ if (line.startsWith("

    ")||line.startsWith("

    ")) line=line.substring(3,line.length()); outxml.print(line+" "); line=br.readLine(); } outxml.println(""); //competing models line=br.readLine(); while (!line.startsWith("")) line=br.readLine(); while ( (!line.startsWith("")) && (!line.startsWith("-1) ){ i=line.indexOf("",i)+1,line.indexOf("<",i)); outxml.println(""+tmp+""); } else if (line.indexOf("has not yet been compiled")>-1) { tmp=line.substring(line.indexOf(">")+1, min(line.indexOf("<",2),line.length())); outxml.println(tmp); } line=br.readLine(); } //warranty while (!line.startsWith(""); while (!line.startsWith("")) { if (line.startsWith("

  • "); //specs while (!line.startsWith("")) line=br.readLine(); outxml.println(""); //while (!line.startsWith("")+4,line.length()).trim(); outxml.println(""+tmp+""); while ( (line.toLowerCase().indexOf("epa mileage")<0)&& (!line.startsWith(""); write_fueldata(br,outxml); outxml.println(""); while ( (line.toLowerCase().indexOf("range")<0)&& (!line.startsWith(""); write_fueldata(br,outxml); outxml.println(""); } } outxml.println(""); //insurance while (!line.startsWith("",i)+1,min(line.length(),line.indexOf("<",i))).trim(); outxml.println(""+tmp+""); //prices while (!line.startsWith("invoice")<0) line=br.readLine(); line=br.readLine(); i=line.indexOf("",i)+1,line.indexOf("<",i)).trim(); outxml.println(""+tmp+""); while (line.toLowerCase().indexOf("msrp")<0) line=br.readLine(); line=br.readLine(); i=line.indexOf("",i)+1,line.indexOf("<",i)).trim(); outxml.println(""+tmp+""); while (line.toLowerCase().indexOf("financing")<0) line=br.readLine(); line=br.readLine(); i=line.indexOf("",i)+1,line.indexOf("<",i)).trim(); outxml.println(""+tmp+""); while (line.toLowerCase().indexOf("destination charge")<0) line=br.readLine(); line=br.readLine(); i=line.indexOf("",i)+1,line.indexOf("<",i)).trim(); outxml.println(""+tmp+""); while (line.toLowerCase().indexOf("gas guzzler")<0) line=br.readLine(); line=br.readLine(); i=line.indexOf("",i)+1,line.indexOf("<",i)).trim(); outxml.println(""+tmp+""); while (line.toLowerCase().indexOf("dealer hold back")<0) line=br.readLine(); line=br.readLine(); i=line.indexOf("",i)+1,line.indexOf("<",i)).trim(); outxml.println(""+tmp+""); //standard equipment outxml.print(""); while (!line.startsWith("")) line=br.readLine(); while (!line.startsWith("")) line="\n"+line.substring(4,line.length()); if (line.startsWith("

    ")) line=line.substring(3,line.length()); if (line.startsWith("\n

    ")) line="\n"+line.substring(4,line.length()); if (line.endsWith("

    ")) line=line.substring(0,line.indexOf("

    ")); outxml.print(line); line=br.readLine(); } outxml.println(""); //options while ( (line.indexOf("OPTIONS CALCULATOR")<0) && (!line.startsWith("
    ")+3,line.indexOf("
    ")); outxml.println("
    ")); outxml.println(""+tmp+""); line=br.readLine(); while (line.indexOf("",i)+1,line.indexOf("<",i)).trim(); outxml.println(""+tmp+""); line=br.readLine(); while (line.indexOf("",i)+1,line.indexOf("<",i)).trim(); outxml.println(""+tmp+"\n"); } line=br.readLine(); } outxml.println(""); outxml.close(); br.close(); } catch (Exception e) { e.printStackTrace(); try { BufferedReader b=new BufferedReader(new InputStreamReader(System.in)); b.readLine(); }catch (Exception e2) {;} } } private static void parse_spec(String line, PrintWriter pw) { String tag,tmp; if (line.startsWith("
  • ")) { tag=line.substring(line.indexOf("")+3, min(line.indexOf(":"),line.indexOf("("))).trim(); tag=tag.replace(' ','_'); tag=tag.replace('-','_'); tag=tag.toLowerCase(); tmp=line.substring(line.indexOf("")+4,line.indexOf("
  • ")).trim(); pw.println("<"+tag+">"+tmp+""); } } private static void write_fueldata(BufferedReader br,PrintWriter pw) { String line,tmp; try { for (int i=0;i<2;i++) { line=br.readLine(); if (line.toLowerCase().indexOf("manual")>-1) { tmp=line.substring(line.indexOf("")+4,min(line.length(),line.indexOf("
    "))).trim(); pw.println(""+tmp+""); } else if (line.toLowerCase().indexOf("automatic")>-1) { tmp=line.substring(line.indexOf("")+4,min(line.length(),line.indexOf("
    "))).trim(); pw.println(""+tmp+""); } } } catch (IOException e) { e.printStackTrace(); } } private static void write_warranty(String line, PrintWriter pw) { String tmp,type; type=line.substring(line.indexOf("")+3,line.indexOf("")).trim(); tmp=line.substring(line.indexOf("")+4,line.indexOf("
    ")); if (type.toLowerCase().startsWith("basic")) { pw.print(""); pw.print(writewty_yr_miles(tmp)); pw.println(""); } else if (type.toLowerCase().startsWith("drivetrain")) { pw.print(""); pw.print(writewty_yr_miles(tmp)); pw.println(""); } else if (type.toLowerCase().startsWith("roadside")) { pw.print(""); pw.print(writewty_yr_miles(tmp)); pw.println(""); } else if (type.toLowerCase().startsWith("rust")) { pw.print(""); pw.print(writewty_yr_miles(tmp)); pw.println(""); } } private static String writewty_yr_miles(String s) { String tmp="",tmp2; if (s.indexOf("/")<0) { s=s.trim(); tmp=tmp+s+""+s+""; } else { tmp2=s.substring(0,s.indexOf("y")).trim(); tmp=tmp+tmp2+""; tmp2=s.substring(s.indexOf("/")+1,s.indexOf("mi.")).trim(); tmp=tmp+tmp2+""; } return tmp; } private static String space2underscore(String str) { return str.replace(' ','_'); } private static String slash2underscore(String str) { return str.replace('/','_'); } private static int min(int a,int b) { if (a<0) return b; if (b<0) return a; if (a