1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
| function insertImage($url, $src, $alt, $title, $description, $keywords, $htmlContentToTxt, $urlClicked, $urlBroken, $lang, $width, $height, $type, $imageExtension, $attr, $providerName, $cmsUsed, $authorName, $authorUrl, $publishedTime, $centroidScore, $graphBasedScore, $scrapeScore, $centroidWeightedScore, $crawledDate) {
global $con;
//création de la column Image
$con->execute("CREATE TABLE IF NOT EXISTS images(siteUrl varchar PRIMARY KEY, imageUrl varchar, alt varchar, title varchar, description varchar,
keywords varchar, textFromWebPage varchar, clicks bigint, broken int, site_lang varchar, width_of_image float,
height_of_image float, image_type varchar, image_extension varchar, image_attribute varchar, providerName varchar,
cmsUsed varchar, authorName varchar, authorUrl varchar, publishedTime timestamp, centroidScore float,
graphBasedScore float, scrapeScore float, centroidWeightedScore float, created_date timestamp) WITH caching='ALL';");
// Using a SSTable Attached Secondary Index (SASI) for "LIKE" (https://docs.datastax.com/en/dse/5.1/cql/cql/cql_using/useSASIIndex.html):
$con->execute("CREATE CUSTOM INDEX images_prefix ON images(siteUrl, alt, title, keywords, description, textFromWebPage) USING 'org.apache.cassandra.index.sasi.SASIIndex'");
$query = $con->prepare("INSERT INTO images(siteUrl, imageUrl, alt, title, description, keywords, textFromWebPage, clicks, broken, site_lang, width_of_image, height_of_image, image_type, image_extension, image_attribute, providerName, cmsUsed, authorName, authorUrl, publishedTime, centroidScore, graphBasedScore, scrapeScore, centroidWeightedScore, created_date)
VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
$data = array($url, $src, $alt, $title, $description, $keywords, $htmlContentToTxt, $urlClicked, $urlBroken, $lang, $width, $height,
$type, $imageExtension, $attr, $providerName, $cmsUsed, $authorName, $authorUrl, $publishedTime,
$centroidScore, $graphBasedScore, $scrapeScore, $centroidWeightedScore, $crawledDate
);
$futures = array();
// execute all statements in background
foreach ($data as $arguments) {
// $futures[] = $session->executeAsync($statement, array(
$futures[] = $con->executeAsync($query, array( // "executeAsync" est utilisé dans "CASSANDRA Asynchronous"
'arguments' => $arguments
));
}
// wait for all statements to complete
foreach ($futures as $future) {
// we will not wait for each result for more than 10 seconds. You must ensure that this future has enough time to be executed by calling "Future::get()"
$future->get(10);
}
} |