Built a robots.txt tester tool
When any search engine robots visit a website, they initially try to access a site's robots.txt file to knowing what is allowed and what is not allowed to be crawled on site URL links.robot_txt_checker.php
1 <!DOCTYPE html>
2 <html>
3 <head>
4 <meta charset = "utf-8"/>
5 <title>robots.txt tester tool</title>
6 <style>
7 body {
8 font-family: 'Merriweather', serif;
9 font-size:16px;
10 }
11
12 h1 {
13 font-size:16px;
14 }
15
16 th {
17 font-size:15px;
18 }
19
20 table, td, th {
21 border: 1px solid #ddd;
22 text-align: left;
23 }
24
25 table {
26 border-collapse: collapse;
27 width: 100%;
28 }
29
30 th, td {
31 padding: 15px;
32 }
33 </style>
34 </head>
35 <body>
36 <div><h1>robots.txt tester tool</h1></div>
37 <div>
38 <div>
39 <form action="robot_txt_checker.php" method="post">
40 <input type="input" name="url" placeholder="https://www.agloberry.com" value=""/>
41 <input type="submit" name="submit" value="Submit"/>
42 </form>
43 </div>
44 <div style="margin-top:10px;">
45 <?php
46 $userAgent = array();
47 $allow = array();
48 $disallow = array();
49 $siteMap = array();
50
51 if(isset$_POST["submit"] && isset$_POST["url"]) {
52 $url = trim($_POST["url"]);
53 if(filter_var$url,FILTER_VALIDATE_URL == true) { //Check user input is URL or not
54 $dump = parse_url($url);
55 $robotFileLocation = $dump["scheme"]."://".$dump["host"]."/robots.txt";
56 $robotFileObject = @fopen($robotFileLocation,"r"); // Character "@" is use to suppressed warning, if robots.txt not found in base URL
57 if($robotFileObject != NULL) {
58 $checkPoint = 0;
59 $wordBuffer = "";
60 $robotKeyword = "";
61 while(!feof$robotFileObject) {
62 $character = fgetc($robotFileObject);
63 if(ord$character != 10 && ord$character != 32)
64 { $wordBuffer .= $character; }
65 else
66 {
67 $wordBuffer = trim($wordBuffer);
68 if($wordBuffer != "") {
69 if(strcmp$wordBuffer,"User-agent:" == 0)
70 { $checkPoint = 1; }
71 else if($checkPoint == 1)
72 {
73 $key = array_search($wordBuffer,$userAgent);
74 if($key == NULL) {
75 $userAgent[] = $wordBuffer;
76 $key = count($userAgent)-1;
77 }
78 $checkPoint = 2;
79 }
80 else if($checkPoint == 2 && strcmp$wordBuffer,"Disallow:" == 0 || strcmp$wordBuffer,"Allow:" == 0 || strcmp$wordBuffer,"Sitemap:" == 0)
81 { $robotKeyword = $wordBuffer; }
82 else
83 {
84 update($robotKeyword, $key);
85 $robotKeyword = "";
86 }
87 $wordBuffer = "";
88 }
89 }
90 }
91 $wordBuffer = trim($wordBuffer);
92 if($wordBuffer != "") {
93 update($robotKeyword, $key);
94 }
95 @fclose($robotFileObject);
96 }
97 else
98 { echo "<div style='color:red;'>robots.txt file not found</div>"; }
99 }
100 else
101 { echo "<div style='color:red;'>Please enter valid URL</div>"; }
102 }
103
104 $userAgentCounter = 0;
105 $userAgentTotal = count($userAgent);
106 if($userAgentTotal >= 1) {
107 ?>
108 <table width="100%">
109 <tr>
110 <th valign='top'>S.No</th>
111 <th valign='top'>User Agent</th>
112 <th valign='top'>Allow Path</th>
113 <th valign='top'>Disallow Path</th>
114 <th valign='top'>Sitemap</th>
115 </tr>
116 <?php
117 while($userAgentCounter < $userAgentTotal) {
118 echo "<tr>";
119 echo "<td valign='top'>".($userAgentCounter+1)."</td>";
120 echo "<td valign='top'>".$userAgent[$userAgentCounter]."</td>";
121 $allowCounter = 0;
122 $allowTotal = count($allow[$userAgentCounter]);
123 echo "<td valign='top'>";
124 while($allowCounter < $allowTotal) {
125 echo $allow[$userAgentCounter][$allowCounter]."<br/>";
126 $allowCounter++;
127 }
128 echo "</td>";
129 $disallowCounter = 0;
130 $disallowTotal = count($disallow[$userAgentCounter]);
131 echo "<td valign='top'>";
132 while($disallowCounter < $disallowTotal) {
133 echo $disallow[$userAgentCounter][$disallowCounter]."<br/>";
134 $disallowCounter++;
135 }
136 echo "</td>";
137 $sitemapCounter = 0;
138 $sitemapTotal = count($siteMap[$userAgentCounter]);
139 echo "<td valign='top'>";
140 while($sitemapCounter < $sitemapTotal) {
141 echo $siteMap[$userAgentCounter][$sitemapCounter]."<br/>";
142 $sitemapCounter++;
143 }
144 echo "</td>";
145 echo "</tr>";
146 $userAgentCounter++;
147 }
148 ?>
149 </table>
150 <?php
151 }
152 ?>
153 </div>
154 </div>
155 <div>
156 <p>Created by <a href = "https://www.algoberry.com" target="_blank">Algoberry</a></p>
157 </div>
158 </body>
159 </html>
160
161 <?php
162 function update($robotKeyword, $key) {
163 global $disallow, $allow, $siteMap,$wordBuffer;
164 if($robotKeyword == "Disallow:")
165 { $disallow[$key][] = $wordBuffer; }
166 else if($robotKeyword == "Allow:")
167 { $allow[$key][] = $wordBuffer; }
168 else if($robotKeyword == "Sitemap:")
169 { $siteMap[$key][] = $wordBuffer; }
170 }
171 ?>