Home Tools

Built a robots.txt tester tool

When any search engine robots visit a website, they initially try to access a site's robots.txt file to knowing what is allowed and what is not allowed to be crawled on site URL links.
robot_txt_checker.php
1<!DOCTYPE html>  
2<html>  
3<head> 
4	<meta charset = "utf-8"/> 
5	<title>robots.txt tester tool</title> 
6	<style>
7	body {
8	font-family: 'Merriweather', serif;
9	font-size:16px;
10	}
11
12	h1 {
13	font-size:16px;
14	}
15
16	th {
17	font-size:15px;
18	}
19	
20	table, td, th {  
21	border: 1px solid #ddd;
22	text-align: left;
23	}
24
25	table {
26	border-collapse: collapse;
27	width: 100%;
28	}
29
30	th, td {
31	padding: 15px;
32	}
33	</style>
34</head> 
35<body> 
36	<div><h1>robots.txt tester tool</h1></div>
37	<div> 
38		<div> 
39			<form action="robot_txt_checker.php" method="post">
40			<input type="input" name="url" placeholder="https://www.agloberry.com" value=""/>
41			<input type="submit" name="submit" value="Submit"/>
42			</form>
43		</div> 
44		<div style="margin-top:10px;">
45		<?php
46		$userAgent = array();
47		$allow = array();
48		$disallow = array();
49		$siteMap = array();
50		
51		if(isset$_POST["submit"] && isset$_POST["url"]) {
52			$url = trim($_POST["url"]);
53			if(filter_var$url,FILTER_VALIDATE_URL == true) {	//Check user input is URL or not
54				$dump = parse_url($url);
55				$robotFileLocation = $dump["scheme"]."://".$dump["host"]."/robots.txt";
56				$robotFileObject = @fopen($robotFileLocation,"r"); // Character "@" is use to suppressed warning, if robots.txt not found in base URL
57				if($robotFileObject != NULL) {
58					$checkPoint = 0;
59					$wordBuffer = "";
60					$robotKeyword = "";
61					while(!feof$robotFileObject) {
62						$character = fgetc($robotFileObject);
63						if(ord$character != 10 && ord$character != 32)
64						{	$wordBuffer .= $character;	}
65						else
66						{
67							$wordBuffer = trim($wordBuffer);
68							if($wordBuffer != "") {
69								if(strcmp$wordBuffer,"User-agent:" == 0)
70								{	$checkPoint = 1;	}
71								else if($checkPoint == 1)
72								{	
73									$key = array_search($wordBuffer,$userAgent);
74									if($key == NULL) {
75										$userAgent[] = $wordBuffer;
76										$key = count($userAgent)-1;
77									}
78									$checkPoint = 2;	
79								}
80								else if($checkPoint == 2 && strcmp$wordBuffer,"Disallow:" == 0 || strcmp$wordBuffer,"Allow:" == 0 || strcmp$wordBuffer,"Sitemap:" == 0)
81								{	$robotKeyword = $wordBuffer;	}
82								else
83								{
84									update($robotKeyword, $key);
85									$robotKeyword = "";
86								}
87								$wordBuffer = "";
88							}
89						}
90					}
91					$wordBuffer = trim($wordBuffer);
92					if($wordBuffer != "") {
93					update($robotKeyword, $key);
94					}
95					@fclose($robotFileObject);			
96				}
97				else
98				{ echo "<div style='color:red;'>robots.txt file not found</div>"; }
99			}
100			else
101			{ echo "<div style='color:red;'>Please enter valid URL</div>"; }
102		}
103				
104		$userAgentCounter = 0;
105		$userAgentTotal = count($userAgent);
106		if($userAgentTotal >= 1) {
107			?>
108			<table width="100%">
109				<tr>
110					<th valign='top'>S.No</th>
111					<th valign='top'>User Agent</th>
112					<th valign='top'>Allow Path</th>
113					<th valign='top'>Disallow Path</th>
114					<th valign='top'>Sitemap</th>
115				</tr>
116				<?php
117				while($userAgentCounter < $userAgentTotal) {
118					echo "<tr>";
119					echo "<td valign='top'>".($userAgentCounter+1)."</td>";
120					echo "<td valign='top'>".$userAgent[$userAgentCounter]."</td>";
121					$allowCounter = 0;
122					$allowTotal = count($allow[$userAgentCounter]);
123					echo "<td valign='top'>";
124					while($allowCounter < $allowTotal) {
125						echo $allow[$userAgentCounter][$allowCounter]."<br/>";
126						$allowCounter++;
127					}
128					echo "</td>";
129					$disallowCounter = 0;
130					$disallowTotal = count($disallow[$userAgentCounter]);
131					echo "<td valign='top'>";
132					while($disallowCounter < $disallowTotal) {
133						echo $disallow[$userAgentCounter][$disallowCounter]."<br/>";
134						$disallowCounter++;
135					}
136					echo "</td>";
137					$sitemapCounter = 0;
138					$sitemapTotal = count($siteMap[$userAgentCounter]);
139					echo "<td valign='top'>";
140					while($sitemapCounter < $sitemapTotal) {
141						echo $siteMap[$userAgentCounter][$sitemapCounter]."<br/>";
142						$sitemapCounter++;
143					}
144					echo "</td>";
145					echo "</tr>";
146					$userAgentCounter++;
147				}
148				?>
149			</table>
150			<?php
151		}
152		?>
153		</div>
154	</div> 
155	<div> 
156	<p>Created by <a href = "https://www.algoberry.com" target="_blank">Algoberry</a></p> 
157	</div>
158</body> 
159</html>
160
161<?php
162function update($robotKeyword, $key) {
163	global $disallow, $allow, $siteMap,$wordBuffer;
164	if($robotKeyword == "Disallow:") 
165	{	$disallow[$key][] = $wordBuffer;	}
166	else if($robotKeyword == "Allow:")
167	{	$allow[$key][] = $wordBuffer;	}
168	else if($robotKeyword == "Sitemap:")
169	{	$siteMap[$key][] = $wordBuffer;	}
170}
171?>