try to fix word of the day

This commit is contained in:
Jacob Dubin
2026-04-18 16:43:38 -05:00
parent 83a9350a9d
commit 6d457fe1c0
11 changed files with 825 additions and 3 deletions

View File

@@ -83,6 +83,13 @@ Evidence from the latest `2026-04-18` captures:
- some recognized phrases fell into placeholder provider replies because the intent was recognized but the feature path behind it is still a stub
- short yes/no responses need the same session-aware treatment already prototyped in Node, especially for create-flow style follow-ups
Evidence from the latest word-of-the-day capture round:
- yes/no photo confirmation improved and now completes through the constrained follow-up path
- `CLIENT_NLU` menu navigation is surfacing richer `destination` entities such as `snapshot`, `fun`, and `word-of-the-day`
- word-of-the-day guesses can arrive as structured `CLIENT_NLU` turns with `intent=guess`, `rules=["word-of-the-day/puzzle"]`, and `entities.guess=<word>`
- those structured turns should be treated as first-class cloud inputs even when no free-form transcript is present
Near-term interaction work should now prioritize:
1. preserve and interpret yes/no turn constraints from observed listen rules
@@ -90,6 +97,17 @@ Near-term interaction work should now prioritize:
3. keep synthetic transcript hints as the most reliable parity path when captures already provide them
4. continue evaluating whether local preprocessing is worth further investment or whether managed STT should replace it for the next serious testing phase
## Capture Storage Direction
Repo-local NDJSON plus zipped capture bundles are still good enough for current reverse-engineering and single-operator testing.
For hosted group testing, the next direction should be:
1. keep local file sinks for dev and laptop workflows
2. add a cleaner export/archive boundary so noteworthy sessions can be promoted without copying raw capture trees around manually
3. plan for hosted durable storage separately from the runtime node that is serving live robot traffic
4. keep fixture generation and sanitized replay artifacts as the stable handoff format between local testing and hosted debugging
## Working Cloud Framework
The current evidence in captures, fixtures, and Node behavior supports three main cloud interaction paths:

View File

@@ -110,6 +110,7 @@ Current raw-audio behavior is still a compatibility bridge:
- this is intentionally not a claim of real ASR parity
- follow-up turns now preserve enough constraint state to distinguish yes/no-style replies from ordinary free-form chat
- create-flow yes/no turns now preserve `create/is_it_a_keeper` and `domain=create` in the outbound synthetic `LISTEN` payload
- structured word-of-the-day guesses now complete as `CLIENT_NLU` turns instead of falling back to pending/blank-audio behavior
- phrase matching has been widened slightly for known test prompts such as joke, dance, surprise, weather, calendar, commute, and news variants
## Buffered Audio STT
@@ -148,6 +149,12 @@ Latest live-capture guidance after the `2026-04-18` round:
- treat `ffmpeg` decode failures on normalized Ogg captures as evidence that the local audio path still needs more hardening before it can be the default live-test expectation
- keep the Node implementation as the oracle for yes/no turn semantics and audio preprocessing details until the `.NET` port catches up
Capture-storage guidance while moving toward hosted group testing:
- repo-local file captures remain the default for laptop-based reverse engineering
- hosted deployments should keep runtime request handling decoupled from long-term capture retention
- sanitized fixtures remain the preferred durable artifact for parity work and bug reproduction
## Current Interaction Paths
The working cloud model currently looks like three main paths:

View File

@@ -16,9 +16,11 @@ public sealed class JiboInteractionService(
var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent)
? rawClientIntent?.ToString()
: null;
var clientRules = ReadRules(turn, "clientRules").ToArray();
var clientEntities = ReadEntities(turn);
var isYesNoTurn = IsYesNoTurn(turn);
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent, isYesNoTurn);
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent, clientRules, clientEntities, isYesNoTurn);
return semanticIntent switch
{
"joke" => BuildJokeDecision(catalog),
@@ -29,6 +31,8 @@ public sealed class JiboInteractionService(
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
"yes" => new JiboInteractionDecision("yes", "Yes."),
"no" => new JiboInteractionDecision("no", "No."),
"word_of_the_day" => new JiboInteractionDecision("word_of_the_day", "Word of the day is ready."),
"word_of_the_day_guess" => BuildWordOfTheDayGuessDecision(clientEntities),
"surprise" => new JiboInteractionDecision("surprise", randomizer.Choose(catalog.SurpriseReplies)),
"personal_report" => new JiboInteractionDecision("personal_report", randomizer.Choose(catalog.PersonalReportReplies)),
"weather" => new JiboInteractionDecision("weather", randomizer.Choose(catalog.WeatherReplies)),
@@ -90,8 +94,26 @@ public sealed class JiboInteractionService(
.Replace("{transcript}", transcript, StringComparison.Ordinal);
}
private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent, bool isYesNoTurn)
private static string ResolveSemanticIntent(
string loweredTranscript,
string? clientIntent,
IReadOnlyList<string> clientRules,
IReadOnlyDictionary<string, string> clientEntities,
bool isYesNoTurn)
{
if (string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) &&
clientRules.Any(rule => string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase)))
{
return "word_of_the_day_guess";
}
if (string.Equals(clientIntent, "loadMenu", StringComparison.OrdinalIgnoreCase) &&
clientEntities.TryGetValue("destination", out var destination) &&
string.Equals(destination, "word-of-the-day", StringComparison.OrdinalIgnoreCase))
{
return "word_of_the_day";
}
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
{
return "time";
@@ -178,6 +200,19 @@ public sealed class JiboInteractionService(
return "chat";
}
private static JiboInteractionDecision BuildWordOfTheDayGuessDecision(IReadOnlyDictionary<string, string> clientEntities)
{
var guess = clientEntities.TryGetValue("guess", out var guessValue)
? guessValue
: string.Empty;
var reply = string.IsNullOrWhiteSpace(guess)
? "I heard your word of the day guess."
: $"I heard {guess}.";
return new JiboInteractionDecision("word_of_the_day_guess", reply);
}
private static bool IsYesNoTurn(TurnContext turn)
{
return ReadRules(turn, "listenRules").Concat(ReadRules(turn, "clientRules"))
@@ -204,6 +239,26 @@ public sealed class JiboInteractionService(
};
}
private static IReadOnlyDictionary<string, string> ReadEntities(TurnContext turn)
{
if (!turn.Attributes.TryGetValue("clientEntities", out var value) || value is null)
{
return new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
}
return value switch
{
JsonElement { ValueKind: JsonValueKind.Object } json => json.EnumerateObject()
.Where(static property => property.Value.ValueKind == JsonValueKind.String)
.ToDictionary(property => property.Name, property => property.Value.GetString() ?? string.Empty, StringComparer.OrdinalIgnoreCase),
IReadOnlyDictionary<string, string> typed => typed,
IDictionary<string, object?> dictionary => dictionary
.Where(pair => pair.Value is not null)
.ToDictionary(pair => pair.Key, pair => pair.Value?.ToString() ?? string.Empty, StringComparer.OrdinalIgnoreCase),
_ => new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
};
}
private static bool MatchesAny(string loweredTranscript, params string[] candidates)
{
return candidates.Any(candidate => loweredTranscript.Contains(candidate, StringComparison.Ordinal));

View File

@@ -24,7 +24,10 @@ public sealed class ResponsePlanToSocketMessagesMapper
var outboundIntent = string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
? clientIntent
: plan.IntentName ?? "unknown";
var outboundAsrText = isYesNoTurn && isYesNoIntent
var nluGuess = ReadClientEntity(turn, "guess");
var outboundAsrText = string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(nluGuess)
? nluGuess
: isYesNoTurn && isYesNoIntent
? transcript
: string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
? clientIntent
@@ -206,6 +209,26 @@ public sealed class ResponsePlanToSocketMessagesMapper
: null;
}
private static string? ReadClientEntity(TurnContext turn, string entityName)
{
if (!turn.Attributes.TryGetValue("clientEntities", out var value) || value is null)
{
return null;
}
return value switch
{
JsonElement { ValueKind: JsonValueKind.Object } jsonElement
when jsonElement.TryGetProperty(entityName, out var property) && property.ValueKind == JsonValueKind.String
=> property.GetString(),
IReadOnlyDictionary<string, string> typed when typed.TryGetValue(entityName, out var entityValue)
=> entityValue,
IDictionary<string, object?> dictionary when dictionary.TryGetValue(entityName, out var entityValue)
=> entityValue?.ToString(),
_ => null
};
}
private static object BuildSkillPayload(ResponsePlan plan, TurnContext turn, string transId, SpeakAction speak, InvokeNativeSkillAction? skill)
{
var skillPayload = skill?.Payload;

View File

@@ -490,7 +490,16 @@ public sealed class WebSocketTurnFinalizationService(
private static bool IsTranscriptUsable(TurnContext turn)
{
var messageType = ReadMessageType(turn);
var clientIntent = ReadAttribute(turn, "clientIntent");
var transcript = NormalizeTranscript(turn.NormalizedTranscript ?? turn.RawTranscript);
if (string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) &&
!string.IsNullOrWhiteSpace(clientIntent))
{
return true;
}
if (string.IsNullOrWhiteSpace(transcript))
{
return false;
@@ -546,4 +555,16 @@ public sealed class WebSocketTurnFinalizationService(
.Replace(" ", " ", StringComparison.Ordinal)
.Trim();
}
private static string? ReadMessageType(TurnContext turn)
{
return ReadAttribute(turn, "messageType");
}
private static string? ReadAttribute(TurnContext turn, string key)
{
return turn.Attributes.TryGetValue(key, out var value)
? value?.ToString()
: null;
}
}

View File

@@ -1,6 +1,7 @@
using Jibo.Cloud.Application.Services;
using Jibo.Cloud.Infrastructure.Content;
using Jibo.Runtime.Abstractions;
using System.Text.Json;
namespace Jibo.Cloud.Tests.WebSockets;
@@ -89,6 +90,27 @@ public sealed class JiboInteractionServiceTests
Assert.Equal("joke", decision.IntentName);
}
[Fact]
public async Task BuildDecisionAsync_WordOfDayGuess_UsesStructuredClientNluGuess()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "guess",
NormalizedTranscript = "guess",
Attributes = new Dictionary<string, object?>
{
["clientIntent"] = "guess",
["clientRules"] = new[] { "word-of-the-day/puzzle" },
["clientEntities"] = JsonDocument.Parse("""{"guess":"pastoral"}""").RootElement.Clone()
}
});
Assert.Equal("word_of_the_day_guess", decision.IntentName);
Assert.Equal("I heard pastoral.", decision.ReplyText);
}
private static JiboInteractionService CreateService()
{
return new JiboInteractionService(

View File

@@ -373,6 +373,38 @@ public sealed class JiboWebSocketServiceTests
Assert.Equal("create/is_it_a_keeper", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
}
[Fact]
public async Task ClientNlu_WordOfDayGuess_UsesGuessEntityAsAsrTextAndCompletesTurn()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-guess-token",
Text = """{"type":"LISTEN","transID":"trans-wod-guess","data":{"rules":["word-of-the-day/puzzle","globals/gui_nav"],"asr":{"hints":["pastoral","doodad","escarpment"],"earlyEOS":["pastoral","doodad","escarpment"]}}}"""
});
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-guess-token",
Text = """{"type":"CLIENT_NLU","transID":"trans-wod-guess","data":{"entities":{"guess":"pastoral"},"intent":"guess","rules":["word-of-the-day/puzzle"]}}"""
});
Assert.Equal(2, replies.Count);
Assert.Equal("LISTEN", ReadReplyType(replies[0]));
Assert.Equal("EOS", ReadReplyType(replies[1]));
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("pastoral", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
Assert.Equal("guess", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal("pastoral", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("guess").GetString());
Assert.Equal("word-of-the-day/puzzle", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
}
[Fact]
public async Task BufferedAudio_WithSyntheticTranscriptHint_FinalizesThroughSttSeam()
{