Compito per il marito: controlla la testi che sto scrivendo, che non abbia copiato qualche frase senza accorgermene.
Svolgimento: beh mica mi metto a fare dei copa ed incolla, meglio cercare una soluzione automatizzata
Per cui ecco un semplice programmino Java adatto allo scopo. Per farlo girare bisogna però scollegarsi e ricollegarsi ad internet altrimenti google si arrabbia e smette di fornire risposte.
/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package verificaarticoli; import com.google.gson.Gson; import com.google.gson.JsonIOException; import com.google.gson.JsonSyntaxException; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.net.URL; import java.net.URLEncoder; import java.util.logging.Level; import java.util.logging.Logger; /** * * @author diego */ public class test { String sFileOriginal = "C:\tmp\crar\INTRODUCTION 15112.docx"; String sFileDest = "C:\tmp\crar\INTRODUCTION 15112.html"; org.apache.poi.xwpf.usermodel.XWPFDocument doc; int sleep = 1000; int START = 146; public void apri() throws IOException { java.io.FileInputStream fis = new java.io.FileInputStream(sFileOriginal); try { doc = new org.apache.poi.xwpf.usermodel.XWPFDocument(fis); } finally { fis.close(); } } public void elabora() throws IOException { apri(); java.io.FileOutputStream fOs = new java.io.FileOutputStream(sFileDest, (START>0)); java.io.PrintStream ps = new java.io.PrintStream(fOs); if (START > 0) { ps.print("<html><body>"); } int iPar = 0; for (org.apache.poi.xwpf.usermodel.XWPFParagraph p : doc.getParagraphs()) { ++iPar; if (iPar < START ) { continue; } String s = p.getText(); System.out.println("PARAGRAFO " + s); java.util.StringTokenizer st = new java.util.StringTokenizer(s, "."); ps.print("<b>Paragrafo" + (iPar) + "</b><p>" + s + "</p>"); int iFrase = 0; //for (int i = 0; i < text.length; i++) { while (st.hasMoreTokens()) { String text = (st.nextToken() + ".").trim(); System.out.println("Frase " + text); if (text.split(" ").length > 6) { ps.print("<b>Frase " + iPar + "." + (++iFrase) + " </b><p>"); ps.print(text); ps.print("</p>"); StringBuffer sb = new StringBuffer(); cercaInGoogle(text, sb); ps.print("<b>Risultato</b><p>"); ps.print(sb.toString()); ps.println("</p>"); } ps.flush(); fOs.flush(); } } ps.print("</body></html>"); fOs.close(); } public static void main(String[] args) throws Exception { test t = new test(); t.elabora(); } public String cercaInGoogle(String search) { StringBuffer sbRet = new StringBuffer(2000); cercaInGoogle(search, sbRet); return sbRet.toString(); } public String getMyIP() { String sIp = sOldIp; try { java.net.URL URL = new java.net.URL("http://www.whatismyip.org/"); java.net.HttpURLConnection Conn = (java.net.HttpURLConnection) URL.openConnection(); Conn.setConnectTimeout(500); Conn.setReadTimeout(1000); java.io.InputStream InStream = Conn.getInputStream(); java.io.InputStreamReader Isr = new java.io.InputStreamReader(InStream); java.io.BufferedReader Br = new java.io.BufferedReader(Isr); sIp = Br.readLine(); if ("Too frequent!".equals(sIp)) { sIp = sOldIp; } } catch (IOException ex) { //Logger.getLogger(test.class.getName()).log(Level.SEVERE, null, ex); } return sIp; } String sOldIp = ""; public void cercaInGoogle(String search, StringBuffer sbRet) { //StringBuffer sbRet = new StringBuffer(2000); boolean bDone = true; do { bDone = true; GoogleResults results = null; try { do { String sIP = null; do { sIP = getMyIP(); } while (sIP == null); if (!sOldIp.equals(sIP)) { System.out.println("Indirizzo cambiato da " + sOldIp + " a " + sIP); sOldIp = sIP; } String google = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&userip=" + sIP + "&rsz=6&hl=it&q="; //String search = "battistuzzi"; String charset = "UTF-8"; try { Thread.sleep(sleep); } catch (InterruptedException ex) { Logger.getLogger(test.class.getName()).log(Level.SEVERE, null, ex); } URL url = new URL(google + URLEncoder.encode(search, charset)); java.net.HttpURLConnection ConnGoogle = (java.net.HttpURLConnection) url.openConnection(); ConnGoogle.setConnectTimeout(500); ConnGoogle.setReadTimeout(1000); java.io.InputStream iOpen = ConnGoogle.getInputStream(); Reader reader = new InputStreamReader(iOpen, charset); StringBuffer sbResp = new StringBuffer(); char[] cbuf = new char[100]; int len; while ((len = reader.read(cbuf)) > 0) { sbResp.append(cbuf, 0, len); } System.out.println(sbResp); results = new Gson().fromJson(sbResp.toString(), GoogleResults.class); if (results.getResponseStatus() == 403) { sleep = (int) (sleep * 1.5); System.out.println("Sospetto porto attesa a " + sleep); } } while (results == null || results.getResponseData() == null); if (sleep > 50) { sleep = (int) (sleep * 0.8); System.out.println("Provo ad abbassare porto attesa a " + sleep); } // Show title and URL of 1st result. int i = 100; if (results.getResponseData() != null) { for (GoogleResults.Result r : results.getResponseData().getResults()) { sbRet.append("<a href="").append(r.getUrl()).append("" target="_blank" >").append(r.getTitle()).append("</a><br>"); sbRet.append(r.getContent()).append("<br>"); if (i-- < 0) { break; } } } } catch (Exception x) { bDone = false; } } while (!bDone); } }