blogger開発講座: DMM コミック広告取得処理

これは、詳しい説明は、しません。
内容自分で解析するかして理解して下さい。
わからない方は、そういうもんだと考えてお使い下さい。
この中の処理は、実際にコミックページのhtml を解析し、必要な情報を取得するのに必要なキーワードを見つけ、必要情報を取得してデータを作成する処理です。
それを説明するのは、サイトに失礼ですし、やるべきでは無いと考えます。
サイトの各ページの癖を解析し、記載ルールを調査して作成しました。
以下にソースをのせます。

<!DOCTYPE html>
<html>

<title>DMM広告作成</title>

</head>
<body>
<h1>
DMM広告作成</h1>
<?php
//検索するフォルダ
 
//print_r($result);

$OutoFile = "comic.csv";

function getFileMain($path){
$url =$path;
$data = "";
$cp = curl_init();
curl_setopt($cp,CURLOPT_RETURNTRANSFER,1);
curl_setopt($cp,CURLOPT_URL,$url); curl_setopt($cp,CURLOPT_TIMEOUT,60);
$html=curl_exec($cp);

return $html;
}

function getTitle($html){
 $st="";

$l=strpos($html, "id=\"title\"",0);
 if($l>0){
  $nl=strpos($html,"<",$l);
  $st=substr($html,$l+11,$nl-($l+11));
 }
 return $st;
}

function getcDate($html){
 $sdate="";

$l=strpos($html, "電子書籍販売日",0);
 if($l>0){
  $l=strpos($html, "<span>",$l);
  $nl=strpos($html,"</span>",$l);
  $sdate=substr($html,$l+6,$nl-($l+6));
 }else{
  $l=strpos($html, "配信開始日",0);
  if($l>0){
   $l=strpos($html, "<td>",$l);
   $nl=strpos($html,"</td>",$l);
   $sdate=substr($html,$l+4,$nl-($l+4));
   $sdate=str_replace(array("\r\n", "\r", "\n"), '',$sdate );
  }
 }
 return $sdate;
}

function getpic($html){
 $spic="";

$l=strpos($html, "l-areaDetailMainContent",0);
 if($l>0){

$l=strpos($html, "<a href=\"",$l); 
  $nl=strpos($html,"\"",$l+9);
  $spic=substr($html,$l+9,$nl-($l+9));
 }
 return $spic;
}

function getTatiyomi($html){
 $sturl="";
 $l=strpos($html, ">立ち読み<",0);
 $ss=substr($html,0,$l);
 $l=strrpos($ss,"<a href=\"");
 if($l>0){
  $nl=strpos($html,"\"",$l+9);
  $sturl=substr($html,$l+9,$nl-($l+9));
 }
 return $sturl;
}

if(isset($_REQUEST['title']) ){
 $ss=$_REQUEST['title'].",".$_REQUEST['pic'].",".$_REQUEST['jurl'].",".$_REQUEST['story'].",".$_REQUEST['saito_uri'].",".$_REQUEST['sdate']."\n";
 print $ss."<br>";
 $ss2=$_REQUEST['title'].",".$_REQUEST['pic'].",".$_REQUEST['jurl'].",".$_REQUEST['story'].",".$_REQUEST['saito_uri'].",".$_REQUEST['sdate'].",".date("Y/m/d H:i:s")."\n";
 $OutoFile = $_REQUEST['OutFile']; 
 if (!is_file($OutoFile)){
  $file = fopen($OutoFile, "w");
 }
 else {
  $file = fopen($OutoFile, "a");
 }
 fwrite($file,$ss);
 fclose($file);
 print "終了<br><br><br>";
 
 print "<form name='frm1' action='comic.php' method='POST'>
" ;
 print "<input type='hidden' name='OutFile' value='".$_REQUEST['OutFile']."'>"; 
 print "<input type='submit' style='margin: 3px;width:150px' class='button_example' value='続けて登録'><br><br><br><br>";

print "</form>
";
  print "<input type=\"button\" onclick=\"window.location.href='./index.php';\" value=\"メニューに帰る\" />  <br><br>";
}else if(isset($_REQUEST['saito_uri']) ){
 $OutoFile = $_REQUEST['OutFile']; 
 if (!is_file($OutoFile)){
  $file = fopen($OutoFile, "w");
  fclose($file);
 }
 $html = getFileMain($_REQUEST['saito_uri']);
  $file = new SplFileObject("test.html", "w");
  $file->fwrite($html);
 //タイトル
 $st = getTitle($html);
 if($st!=""){
  print "タイトル:".$st."<br>";
  //電子書籍販売日
  $sdate = getcDate($html);
  if($sdate!=""){
   print "電子書籍販売日:".$sdate."<br>";
   //画像
   $spic = getpic($html);
   if( $spic!=""){
    print "画像URL:".$spic."<br>";
    print "<img src=\"".$spic."\"><br>";
    //立ち読み
    $sturl = getTatiyomi($html);
    if(true /*$sturl!=""*/){
     print "立ち読みURL:".$sturl."<br>";
     $story=getStory($html);
     if( true){
      print "説明:".$story."<br>";
      
      $file = fopen($OutoFile, "r");
      $cunt=0;
      
      while (!feof($file)){
       $str = fgets($file);
       if($story!=""){
        if((strpos($str,$story,0)>-1) &&(strpos($str,$st,0)>-1)){
         $cunt++;
        }
       }
       else{
        if(strpos($str,$st,0)>-1){
         $cunt++;
        }
       }
      } 
      fclose($file);
      if($cunt>0){
       print "<form name='frm1' action='comic.php' method='POST'>
" ;
       print "<input type='hidden' name='OutFile' value='".$_REQUEST['OutFile']."'>"; 
      print "上記内容は登録済みです。<br>続けて登録しますか？<br><br>";
    print "<input type='submit' style='margin: 3px;width:150px' class='button_example' value='登録'>";
 print "</form>
";
 print "<br><br><input type=\"button\" onclick=\"window.location.href='./index.php';\" value=\"メニューに帰る\" />  <br><br>";
      }
      else{
      print "<form name='frm1' action='comic.php' method='POST'>
" ;
      print "上記内容を保存しますか？<br><br>";
      print "<input type='hidden' name='title' value='".$st."'>";     
      print "<input type='hidden' name='pic' value='".$spic."'>"; 
      print "<input type='hidden' name='jurl' value='".$sturl."'>"; 
      print "<input type='hidden' name='story' value='".$story."'>";
      print "<input type='hidden' name='OutFile' value='".$_REQUEST['OutFile']."'>"; 
      print "<input type='hidden' name='saito_uri' value='".$_REQUEST['saito_uri']."'>";
      print "<input type='hidden' name='sdate' value='".$sdate."'>";
     
      print "<input type='submit' style='margin: 3px;width:150px' class='button_example' value='保存'>";
 print "</form>
";
 print "<br><br><input type=\"button\" onclick=\"window.location.href='./index.php';\" value=\"メニューに帰る\" />  <br><br>";     
      }
     }
     else {
      print "説明取得エラー<br>";
     }
    }
    else {
     print "立ち読み飛び先取得エラー<br>";
    }
   }
   else {
    print "画像取得エラー<br>";
   }
  }
  else{
   print "電子書籍販売日エラー<br>";
  }
 }
 else {
  print "タイトル取得エラー<7ys>";
 }
//$domDocument = new DOMDocument();
//var_dump($html);
 
//  print $html."<br>";
//  $file = new SplFileObject("test2.html", "w");
//  $file->fwrite($html);
  
}
else if( isset($_REQUEST['tgt'])){
 if( $_REQUEST['type']=="コミック" ){
  $OutoFile = "comic.csv";
 }
 else {
  $OutoFile = "book.csv";
 }
 $fpath = $_REQUEST['tgt']."/".$OutoFile;
 print "出力ファイル：".$fpath."<br><br>";
 print "<form name='frm1' action='comic.php' method='POST'>
" ;
 print "サイトURL<input type='url' name='saito_uri' style='width:200px;' ><br><br>";
 print "<input type='hidden' name='OutFile' value='".$fpath."'>";

print "<input type='submit' style='margin: 3px;width:150px' class='button_example' value='実行'><br><br><br><br>";
 print "<input type=\"button\" onclick=\"window.location.href='./index.php';\" value=\"メニューに帰る\" />  <br><br>";
 print "</form>
";
}
else if( isset($_REQUEST['OutFile'])){
 print "出力ファイル：".$_REQUEST['OutFile']."<br><br>";
 print "<form name='frm1' action='comic.php' method='POST'>
" ;
 print "サイトURL<input type='url' name='saito_uri' style='width:200px;' ><br><br>";
 print "<input type='hidden' name='OutFile' value='".$_REQUEST['OutFile']."'>";

print "<input type='submit' style='margin: 3px;width:150px' class='button_example' value='実行'>";
 print "</form>
";
 print "<br><br><br><br><input type=\"button\" onclick=\"window.location.href='./index.php';\" value=\"メニューに帰る\" />  <br><br>";
}
else {
 $result = list_files("./");
 print "<form name='frm1' action='comic.php' method='POST'>
" ;

print "<br>";
 print "<select id='selectmenu' name='tgt'>";
 for($i=0;$i<count($result);$i++ ){
  print "<option value=\"".$result[$i]."\">".$result[$i]."</option>";
 }
 print "</select>";
 print "<br>";
 print "<br>";
 print "<select id='selectmenu' name='type'>";
 print "<option value=\"コミック\">コミック</option>";
 print "<option value=\"文庫\">文庫</option>";
 print "</select>";
 print "<br>";
 print "<br>";
 print "<input type='submit' style='margin: 3px;width:150px' class='button_example' value='実行'><br><br> ";
 print "<input type=\"button\" onclick=\"window.location.href='./index.php';\" value=\"メニューに帰る\" />  <br><br>";
 print "</form>
<br><br>";
}
?>
</body>
</html>

いかがでしょうか？
結構大きいプログラムとなってます。
取得される情報は、タイトルや画像URL 立ち読み先リンクURL コミックページURLコミック内容などです。実際にサイトで使用する時は自分のDMM アフィリエイトID を加えて使用する必要があります。
getFileMain
この処理は、file_get_contents関数を作り直した処理です。結果は同じになります。
file_get_contents関数は、制限があり、携帯php では使えますが、レンタルサーバーでは、制限に引っかかって使えない事が多いので、ネットで調べて作成しました。
この処理は、他のサーバーのファイル情報をテキストに全て読み込みます。
この処理によりDMM サイトのコミックページの内容を全て読み込み解析し、広告に必要な情報を抽出してファイルに保存します。
この処理は、実行すると、test.html ファイルを保存します。
これは、実際に読み込みしたDMM サイトの情報を保存した物です。
取得エラーが出た時の確認用です。デバッグに使用します。
次は、ftp のファイルアップを公開しますね！

menu

2017年4月29日土曜日

DMM コミック広告取得処理

0 件のコメント:

コメントを投稿