Built a robots.txt tester tool

When any search engine robots visit a website, they initially try to access a site's robots.txt file to knowing what is allowed and what is not allowed to be crawled on site URL links.
robot_txt_checker.php
1 <!DOCTYPE html> 2 <html> 3 <head> 4 <meta charset = "utf-8"/> 5 <title>robots.txt tester tool</title> 6 <style> 7 body { 8 font-family: 'Merriweather', serif; 9 font-size:16px; 10 } 11 12 h1 { 13 font-size:16px; 14 } 15 16 th { 17 font-size:15px; 18 } 19 20 table, td, th { 21 border: 1px solid #ddd; 22 text-align: left; 23 } 24 25 table { 26 border-collapse: collapse; 27 width: 100%; 28 } 29 30 th, td { 31 padding: 15px; 32 } 33 </style> 34 </head> 35 <body> 36 <div><h1>robots.txt tester tool</h1></div> 37 <div> 38 <div> 39 <form action="robot_txt_checker.php" method="post"> 40 <input type="input" name="url" placeholder="https://www.agloberry.com" value=""/> 41 <input type="submit" name="submit" value="Submit"/> 42 </form> 43 </div> 44 <div style="margin-top:10px;"> 45 <?php 46 $userAgent = array(); 47 $allow = array(); 48 $disallow = array(); 49 $siteMap = array(); 50 51 if(isset($_POST["submit"]) && isset($_POST["url"])) { 52 $url = trim($_POST["url"]); 53 if(filter_var($url,FILTER_VALIDATE_URL) == true) { //Check user input is URL or not 54 $dump = parse_url($url); 55 $robotFileLocation = $dump["scheme"]."://".$dump["host"]."/robots.txt"; 56 $robotFileObject = @fopen($robotFileLocation,"r"); // Character "@" is use to suppressed warning, if robots.txt not found in base URL 57 if($robotFileObject != NULL) { 58 $checkPoint = 0; 59 $wordBuffer = ""; 60 $robotKeyword = ""; 61 while(!feof($robotFileObject)) { 62 $character = fgetc($robotFileObject); 63 if(ord($character) != 10 && ord($character) != 32) 64 { $wordBuffer .= $character; } 65 else 66 { 67 $wordBuffer = trim($wordBuffer); 68 if($wordBuffer != "") { 69 if(strcmp($wordBuffer,"User-agent:") == 0) 70 { $checkPoint = 1; } 71 else if($checkPoint == 1) 72 { 73 $key = array_search($wordBuffer,$userAgent); 74 if($key == NULL) { 75 $userAgent[] = $wordBuffer; 76 $key = count($userAgent)-1; 77 } 78 $checkPoint = 2; 79 } 80 else if($checkPoint == 2 && (strcmp($wordBuffer,"Disallow:") == 0 || strcmp($wordBuffer,"Allow:") == 0 || strcmp($wordBuffer,"Sitemap:") == 0)) 81 { $robotKeyword = $wordBuffer; } 82 else 83 { 84 update($robotKeyword, $key); 85 $robotKeyword = ""; 86 } 87 $wordBuffer = ""; 88 } 89 } 90 } 91 $wordBuffer = trim($wordBuffer); 92 if($wordBuffer != "") { 93 update($robotKeyword, $key); 94 } 95 @fclose($robotFileObject); 96 } 97 else 98 { echo "<div style='color:red;'>robots.txt file not found</div>"; } 99 } 100 else 101 { echo "<div style='color:red;'>Please enter valid URL</div>"; } 102 } 103 104 $userAgentCounter = 0; 105 $userAgentTotal = count($userAgent); 106 if($userAgentTotal >= 1) { 107 ?> 108 <table width="100%"> 109 <tr> 110 <th valign='top'>S.No</th> 111 <th valign='top'>User Agent</th> 112 <th valign='top'>Allow Path</th> 113 <th valign='top'>Disallow Path</th> 114 <th valign='top'>Sitemap</th> 115 </tr> 116 <?php 117 while($userAgentCounter < $userAgentTotal) { 118 echo "<tr>"; 119 echo "<td valign='top'>".($userAgentCounter+1)."</td>"; 120 echo "<td valign='top'>".$userAgent[$userAgentCounter]."</td>"; 121 $allowCounter = 0; 122 $allowTotal = count($allow[$userAgentCounter]); 123 echo "<td valign='top'>"; 124 while($allowCounter < $allowTotal) { 125 echo $allow[$userAgentCounter][$allowCounter]."<br/>"; 126 $allowCounter++; 127 } 128 echo "</td>"; 129 $disallowCounter = 0; 130 $disallowTotal = count($disallow[$userAgentCounter]); 131 echo "<td valign='top'>"; 132 while($disallowCounter < $disallowTotal) { 133 echo $disallow[$userAgentCounter][$disallowCounter]."<br/>"; 134 $disallowCounter++; 135 } 136 echo "</td>"; 137 $sitemapCounter = 0; 138 $sitemapTotal = count($siteMap[$userAgentCounter]); 139 echo "<td valign='top'>"; 140 while($sitemapCounter < $sitemapTotal) { 141 echo $siteMap[$userAgentCounter][$sitemapCounter]."<br/>"; 142 $sitemapCounter++; 143 } 144 echo "</td>"; 145 echo "</tr>"; 146 $userAgentCounter++; 147 } 148 ?> 149 </table> 150 <?php 151 } 152 ?> 153 </div> 154 </div> 155 <div> 156 <p>Created by <a href = "https://www.algoberry.com" target="_blank">Algoberry</a></p> 157 </div> 158 </body> 159 </html> 160 161 <?php 162 function update($robotKeyword, $key) { 163 global $disallow, $allow, $siteMap,$wordBuffer; 164 if($robotKeyword == "Disallow:") 165 { $disallow[$key][] = $wordBuffer; } 166 else if($robotKeyword == "Allow:") 167 { $allow[$key][] = $wordBuffer; } 168 else if($robotKeyword == "Sitemap:") 169 { $siteMap[$key][] = $wordBuffer; } 170 } 171 ?>

1 comment: