Editor/Mcp/Docs/DocCrawler.cs
using Sandbox;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
namespace SboxMcp.Mcp.Docs;
public sealed class CrawlStats
{
public int Crawled { get; set; }
public int Failed { get; set; }
public int FromCache { get; set; }
public int Total { get; set; }
}
public sealed class DocCrawler
{
private const string OutlineApi = "https://docs.facepunch.com/api";
private const string ShareId = "sbox-dev";
private const string DocsBase = "https://docs.facepunch.com/s/sbox-dev";
private const int RequestDelayMs = 100;
private static readonly TimeSpan RequestTimeout = TimeSpan.FromSeconds( 15 );
private readonly DocCache _cache;
private readonly HttpClient _http;
private string _shareUuid;
private List<TreeEntry> _docTree = new();
public DocCrawler( DocCache cache, HttpClient http )
{
_cache = cache;
_http = http;
}
private sealed class TreeEntry
{
public string Id { get; set; }
public string Title { get; set; }
public string Path { get; set; }
public string Url { get; set; }
}
private sealed class TreeNode
{
[JsonPropertyName( "id" )] public string Id { get; set; } = "";
[JsonPropertyName( "url" )] public string Url { get; set; } = "";
[JsonPropertyName( "title" )] public string Title { get; set; } = "";
[JsonPropertyName( "children" )] public List<TreeNode> Children { get; set; }
}
private sealed class ShareInfoData
{
[JsonPropertyName( "shares" )] public List<ShareEntry> Shares { get; set; }
[JsonPropertyName( "sharedTree" )] public TreeNode SharedTree { get; set; }
}
private sealed class ShareEntry
{
[JsonPropertyName( "id" )] public string Id { get; set; } = "";
}
private sealed class DocumentData
{
[JsonPropertyName( "title" )] public string Title { get; set; } = "";
[JsonPropertyName( "text" )] public string Text { get; set; } = "";
[JsonPropertyName( "updatedAt" )] public string UpdatedAt { get; set; }
}
private async Task<JsonElement?> ApiPostAsync( string endpoint, object body, CancellationToken ct )
{
try
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource( ct );
cts.CancelAfter( RequestTimeout );
using var content = new StringContent(
JsonSerializer.Serialize( body ), Encoding.UTF8, "application/json" );
using var response = await _http.PostAsync( $"{OutlineApi}/{endpoint}", content, cts.Token );
if ( !response.IsSuccessStatusCode ) return null;
var raw = await response.Content.ReadAsStringAsync( cts.Token );
using var doc = JsonDocument.Parse( raw );
if ( !doc.RootElement.TryGetProperty( "data", out var data ) ) return null;
return data.Clone();
}
catch
{
return null;
}
}
private static List<TreeEntry> FlattenTree( TreeNode node, string parentPath = "" )
{
var result = new List<TreeEntry>();
var currentPath = string.IsNullOrEmpty( parentPath ) ? node.Title : $"{parentPath}/{node.Title}";
result.Add( new TreeEntry { Id = node.Id, Title = node.Title, Path = currentPath, Url = node.Url } );
if ( node.Children is { Count: > 0 } )
{
foreach ( var c in node.Children )
result.AddRange( FlattenTree( c, currentPath ) );
}
return result;
}
internal static string ExtractCategory( string path )
{
var parts = path.Split( '/', StringSplitOptions.RemoveEmptyEntries );
return parts.Length >= 2 ? parts[1] : "root";
}
private async Task<bool> LoadTreeAsync( CancellationToken ct )
{
var data = await ApiPostAsync( "shares.info", new { id = ShareId }, ct );
if ( data is null ) return false;
var typed = JsonSerializer.Deserialize<ShareInfoData>( data.Value.GetRawText(), JsonOpts.Default );
if ( typed?.SharedTree is null || typed.Shares is null || typed.Shares.Count == 0 )
return false;
_shareUuid = typed.Shares[0].Id;
_docTree = FlattenTree( typed.SharedTree );
return true;
}
private async Task<DocumentData> FetchDocAsync( string docId, CancellationToken ct )
{
var data = await ApiPostAsync( "documents.info",
new { id = docId, shareId = _shareUuid }, ct );
if ( data is null ) return null;
return JsonSerializer.Deserialize<DocumentData>( data.Value.GetRawText(), JsonOpts.Default );
}
public async Task<CrawlStats> CrawlAllAsync( Action<CrawlStats> onProgress, CancellationToken ct )
{
if ( _cache.IsFresh() )
{
var count = _cache.GetPageCount();
return new CrawlStats { FromCache = count, Total = count };
}
var stats = new CrawlStats();
if ( !await LoadTreeAsync( ct ) )
{
Log.Warning( "[MCP Docs] Could not load document tree from docs.facepunch.com" );
stats.FromCache = _cache.GetPageCount();
return stats;
}
stats.Total = _docTree.Count;
Log.Info( $"[MCP Docs] Found {_docTree.Count} docs in tree" );
foreach ( var doc in _docTree )
{
ct.ThrowIfCancellationRequested();
var fullUrl = $"{DocsBase}{doc.Url}";
if ( _cache.IsPageFresh( fullUrl ) )
{
stats.FromCache++;
onProgress?.Invoke( stats );
continue;
}
var fetched = await FetchDocAsync( doc.Id, ct );
if ( fetched is null || string.IsNullOrEmpty( fetched.Text ) || fetched.Text.Length < 10 )
{
stats.Failed++;
onProgress?.Invoke( stats );
await Task.Delay( RequestDelayMs, ct );
continue;
}
_cache.SetPage( new CachedPage
{
Url = fullUrl,
Title = string.IsNullOrEmpty( fetched.Title ) ? doc.Title : fetched.Title,
Category = ExtractCategory( doc.Path ),
Markdown = fetched.Text,
FetchedAt = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
LastUpdated = fetched.UpdatedAt,
} );
stats.Crawled++;
onProgress?.Invoke( stats );
await Task.Delay( RequestDelayMs, ct );
}
var validUrls = _docTree.Select( d => $"{DocsBase}{d.Url}" ).ToHashSet();
var pruned = _cache.RemovePagesNotIn( validUrls );
if ( pruned > 0 )
Log.Info( $"[MCP Docs] Pruned {pruned} stale page(s) from cache" );
_cache.MarkFullCrawl();
_cache.Save();
return stats;
}
public async Task<CachedPage> CrawlSinglePageAsync( string url, CancellationToken ct )
{
if ( _cache.IsPageFresh( url ) )
return _cache.GetPage( url );
if ( _docTree.Count == 0 )
await LoadTreeAsync( ct );
var urlPath = url.StartsWith( DocsBase ) ? url.Substring( DocsBase.Length ) : url;
var entry = _docTree.FirstOrDefault( d => d.Url == urlPath );
if ( entry is null ) return null;
var fetched = await FetchDocAsync( entry.Id, ct );
if ( fetched is null || string.IsNullOrEmpty( fetched.Text ) ) return null;
var page = new CachedPage
{
Url = url,
Title = string.IsNullOrEmpty( fetched.Title ) ? entry.Title : fetched.Title,
Category = ExtractCategory( entry.Path ),
Markdown = fetched.Text,
FetchedAt = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
LastUpdated = fetched.UpdatedAt,
};
_cache.SetPage( page );
_cache.Save();
return page;
}
}