mirror of
https://kevinblog.sytes.net/Code/Jibo-Revival-Group/JiboExperiments.git
synced 2026-06-16 12:56:28 +00:00
try to fix word of the day
This commit is contained in:
@@ -83,6 +83,13 @@ Evidence from the latest `2026-04-18` captures:
|
||||
- some recognized phrases fell into placeholder provider replies because the intent was recognized but the feature path behind it is still a stub
|
||||
- short yes/no responses need the same session-aware treatment already prototyped in Node, especially for create-flow style follow-ups
|
||||
|
||||
Evidence from the latest word-of-the-day capture round:
|
||||
|
||||
- yes/no photo confirmation improved and now completes through the constrained follow-up path
|
||||
- `CLIENT_NLU` menu navigation is surfacing richer `destination` entities such as `snapshot`, `fun`, and `word-of-the-day`
|
||||
- word-of-the-day guesses can arrive as structured `CLIENT_NLU` turns with `intent=guess`, `rules=["word-of-the-day/puzzle"]`, and `entities.guess=<word>`
|
||||
- those structured turns should be treated as first-class cloud inputs even when no free-form transcript is present
|
||||
|
||||
Near-term interaction work should now prioritize:
|
||||
|
||||
1. preserve and interpret yes/no turn constraints from observed listen rules
|
||||
@@ -90,6 +97,17 @@ Near-term interaction work should now prioritize:
|
||||
3. keep synthetic transcript hints as the most reliable parity path when captures already provide them
|
||||
4. continue evaluating whether local preprocessing is worth further investment or whether managed STT should replace it for the next serious testing phase
|
||||
|
||||
## Capture Storage Direction
|
||||
|
||||
Repo-local NDJSON plus zipped capture bundles are still good enough for current reverse-engineering and single-operator testing.
|
||||
|
||||
For hosted group testing, the next direction should be:
|
||||
|
||||
1. keep local file sinks for dev and laptop workflows
|
||||
2. add a cleaner export/archive boundary so noteworthy sessions can be promoted without copying raw capture trees around manually
|
||||
3. plan for hosted durable storage separately from the runtime node that is serving live robot traffic
|
||||
4. keep fixture generation and sanitized replay artifacts as the stable handoff format between local testing and hosted debugging
|
||||
|
||||
## Working Cloud Framework
|
||||
|
||||
The current evidence in captures, fixtures, and Node behavior supports three main cloud interaction paths:
|
||||
|
||||
@@ -110,6 +110,7 @@ Current raw-audio behavior is still a compatibility bridge:
|
||||
- this is intentionally not a claim of real ASR parity
|
||||
- follow-up turns now preserve enough constraint state to distinguish yes/no-style replies from ordinary free-form chat
|
||||
- create-flow yes/no turns now preserve `create/is_it_a_keeper` and `domain=create` in the outbound synthetic `LISTEN` payload
|
||||
- structured word-of-the-day guesses now complete as `CLIENT_NLU` turns instead of falling back to pending/blank-audio behavior
|
||||
- phrase matching has been widened slightly for known test prompts such as joke, dance, surprise, weather, calendar, commute, and news variants
|
||||
|
||||
## Buffered Audio STT
|
||||
@@ -148,6 +149,12 @@ Latest live-capture guidance after the `2026-04-18` round:
|
||||
- treat `ffmpeg` decode failures on normalized Ogg captures as evidence that the local audio path still needs more hardening before it can be the default live-test expectation
|
||||
- keep the Node implementation as the oracle for yes/no turn semantics and audio preprocessing details until the `.NET` port catches up
|
||||
|
||||
Capture-storage guidance while moving toward hosted group testing:
|
||||
|
||||
- repo-local file captures remain the default for laptop-based reverse engineering
|
||||
- hosted deployments should keep runtime request handling decoupled from long-term capture retention
|
||||
- sanitized fixtures remain the preferred durable artifact for parity work and bug reproduction
|
||||
|
||||
## Current Interaction Paths
|
||||
|
||||
The working cloud model currently looks like three main paths:
|
||||
|
||||
@@ -16,9 +16,11 @@ public sealed class JiboInteractionService(
|
||||
var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent)
|
||||
? rawClientIntent?.ToString()
|
||||
: null;
|
||||
var clientRules = ReadRules(turn, "clientRules").ToArray();
|
||||
var clientEntities = ReadEntities(turn);
|
||||
var isYesNoTurn = IsYesNoTurn(turn);
|
||||
|
||||
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent, isYesNoTurn);
|
||||
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent, clientRules, clientEntities, isYesNoTurn);
|
||||
return semanticIntent switch
|
||||
{
|
||||
"joke" => BuildJokeDecision(catalog),
|
||||
@@ -29,6 +31,8 @@ public sealed class JiboInteractionService(
|
||||
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
|
||||
"yes" => new JiboInteractionDecision("yes", "Yes."),
|
||||
"no" => new JiboInteractionDecision("no", "No."),
|
||||
"word_of_the_day" => new JiboInteractionDecision("word_of_the_day", "Word of the day is ready."),
|
||||
"word_of_the_day_guess" => BuildWordOfTheDayGuessDecision(clientEntities),
|
||||
"surprise" => new JiboInteractionDecision("surprise", randomizer.Choose(catalog.SurpriseReplies)),
|
||||
"personal_report" => new JiboInteractionDecision("personal_report", randomizer.Choose(catalog.PersonalReportReplies)),
|
||||
"weather" => new JiboInteractionDecision("weather", randomizer.Choose(catalog.WeatherReplies)),
|
||||
@@ -90,8 +94,26 @@ public sealed class JiboInteractionService(
|
||||
.Replace("{transcript}", transcript, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent, bool isYesNoTurn)
|
||||
private static string ResolveSemanticIntent(
|
||||
string loweredTranscript,
|
||||
string? clientIntent,
|
||||
IReadOnlyList<string> clientRules,
|
||||
IReadOnlyDictionary<string, string> clientEntities,
|
||||
bool isYesNoTurn)
|
||||
{
|
||||
if (string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) &&
|
||||
clientRules.Any(rule => string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
return "word_of_the_day_guess";
|
||||
}
|
||||
|
||||
if (string.Equals(clientIntent, "loadMenu", StringComparison.OrdinalIgnoreCase) &&
|
||||
clientEntities.TryGetValue("destination", out var destination) &&
|
||||
string.Equals(destination, "word-of-the-day", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "word_of_the_day";
|
||||
}
|
||||
|
||||
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "time";
|
||||
@@ -178,6 +200,19 @@ public sealed class JiboInteractionService(
|
||||
return "chat";
|
||||
}
|
||||
|
||||
private static JiboInteractionDecision BuildWordOfTheDayGuessDecision(IReadOnlyDictionary<string, string> clientEntities)
|
||||
{
|
||||
var guess = clientEntities.TryGetValue("guess", out var guessValue)
|
||||
? guessValue
|
||||
: string.Empty;
|
||||
|
||||
var reply = string.IsNullOrWhiteSpace(guess)
|
||||
? "I heard your word of the day guess."
|
||||
: $"I heard {guess}.";
|
||||
|
||||
return new JiboInteractionDecision("word_of_the_day_guess", reply);
|
||||
}
|
||||
|
||||
private static bool IsYesNoTurn(TurnContext turn)
|
||||
{
|
||||
return ReadRules(turn, "listenRules").Concat(ReadRules(turn, "clientRules"))
|
||||
@@ -204,6 +239,26 @@ public sealed class JiboInteractionService(
|
||||
};
|
||||
}
|
||||
|
||||
private static IReadOnlyDictionary<string, string> ReadEntities(TurnContext turn)
|
||||
{
|
||||
if (!turn.Attributes.TryGetValue("clientEntities", out var value) || value is null)
|
||||
{
|
||||
return new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
return value switch
|
||||
{
|
||||
JsonElement { ValueKind: JsonValueKind.Object } json => json.EnumerateObject()
|
||||
.Where(static property => property.Value.ValueKind == JsonValueKind.String)
|
||||
.ToDictionary(property => property.Name, property => property.Value.GetString() ?? string.Empty, StringComparer.OrdinalIgnoreCase),
|
||||
IReadOnlyDictionary<string, string> typed => typed,
|
||||
IDictionary<string, object?> dictionary => dictionary
|
||||
.Where(pair => pair.Value is not null)
|
||||
.ToDictionary(pair => pair.Key, pair => pair.Value?.ToString() ?? string.Empty, StringComparer.OrdinalIgnoreCase),
|
||||
_ => new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
|
||||
};
|
||||
}
|
||||
|
||||
private static bool MatchesAny(string loweredTranscript, params string[] candidates)
|
||||
{
|
||||
return candidates.Any(candidate => loweredTranscript.Contains(candidate, StringComparison.Ordinal));
|
||||
|
||||
@@ -24,7 +24,10 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
var outboundIntent = string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
|
||||
? clientIntent
|
||||
: plan.IntentName ?? "unknown";
|
||||
var outboundAsrText = isYesNoTurn && isYesNoIntent
|
||||
var nluGuess = ReadClientEntity(turn, "guess");
|
||||
var outboundAsrText = string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(nluGuess)
|
||||
? nluGuess
|
||||
: isYesNoTurn && isYesNoIntent
|
||||
? transcript
|
||||
: string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
|
||||
? clientIntent
|
||||
@@ -206,6 +209,26 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
: null;
|
||||
}
|
||||
|
||||
private static string? ReadClientEntity(TurnContext turn, string entityName)
|
||||
{
|
||||
if (!turn.Attributes.TryGetValue("clientEntities", out var value) || value is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return value switch
|
||||
{
|
||||
JsonElement { ValueKind: JsonValueKind.Object } jsonElement
|
||||
when jsonElement.TryGetProperty(entityName, out var property) && property.ValueKind == JsonValueKind.String
|
||||
=> property.GetString(),
|
||||
IReadOnlyDictionary<string, string> typed when typed.TryGetValue(entityName, out var entityValue)
|
||||
=> entityValue,
|
||||
IDictionary<string, object?> dictionary when dictionary.TryGetValue(entityName, out var entityValue)
|
||||
=> entityValue?.ToString(),
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
|
||||
private static object BuildSkillPayload(ResponsePlan plan, TurnContext turn, string transId, SpeakAction speak, InvokeNativeSkillAction? skill)
|
||||
{
|
||||
var skillPayload = skill?.Payload;
|
||||
|
||||
@@ -490,7 +490,16 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
|
||||
private static bool IsTranscriptUsable(TurnContext turn)
|
||||
{
|
||||
var messageType = ReadMessageType(turn);
|
||||
var clientIntent = ReadAttribute(turn, "clientIntent");
|
||||
var transcript = NormalizeTranscript(turn.NormalizedTranscript ?? turn.RawTranscript);
|
||||
|
||||
if (string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.IsNullOrWhiteSpace(clientIntent))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(transcript))
|
||||
{
|
||||
return false;
|
||||
@@ -546,4 +555,16 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
.Replace(" ", " ", StringComparison.Ordinal)
|
||||
.Trim();
|
||||
}
|
||||
|
||||
private static string? ReadMessageType(TurnContext turn)
|
||||
{
|
||||
return ReadAttribute(turn, "messageType");
|
||||
}
|
||||
|
||||
private static string? ReadAttribute(TurnContext turn, string key)
|
||||
{
|
||||
return turn.Attributes.TryGetValue(key, out var value)
|
||||
? value?.ToString()
|
||||
: null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
using Jibo.Cloud.Application.Services;
|
||||
using Jibo.Cloud.Infrastructure.Content;
|
||||
using Jibo.Runtime.Abstractions;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace Jibo.Cloud.Tests.WebSockets;
|
||||
|
||||
@@ -89,6 +90,27 @@ public sealed class JiboInteractionServiceTests
|
||||
Assert.Equal("joke", decision.IntentName);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_WordOfDayGuess_UsesStructuredClientNluGuess()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "guess",
|
||||
NormalizedTranscript = "guess",
|
||||
Attributes = new Dictionary<string, object?>
|
||||
{
|
||||
["clientIntent"] = "guess",
|
||||
["clientRules"] = new[] { "word-of-the-day/puzzle" },
|
||||
["clientEntities"] = JsonDocument.Parse("""{"guess":"pastoral"}""").RootElement.Clone()
|
||||
}
|
||||
});
|
||||
|
||||
Assert.Equal("word_of_the_day_guess", decision.IntentName);
|
||||
Assert.Equal("I heard pastoral.", decision.ReplyText);
|
||||
}
|
||||
|
||||
private static JiboInteractionService CreateService()
|
||||
{
|
||||
return new JiboInteractionService(
|
||||
|
||||
@@ -373,6 +373,38 @@ public sealed class JiboWebSocketServiceTests
|
||||
Assert.Equal("create/is_it_a_keeper", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ClientNlu_WordOfDayGuess_UsesGuessEntityAsAsrTextAndCompletesTurn()
|
||||
{
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-guess-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-wod-guess","data":{"rules":["word-of-the-day/puzzle","globals/gui_nav"],"asr":{"hints":["pastoral","doodad","escarpment"],"earlyEOS":["pastoral","doodad","escarpment"]}}}"""
|
||||
});
|
||||
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-guess-token",
|
||||
Text = """{"type":"CLIENT_NLU","transID":"trans-wod-guess","data":{"entities":{"guess":"pastoral"},"intent":"guess","rules":["word-of-the-day/puzzle"]}}"""
|
||||
});
|
||||
|
||||
Assert.Equal(2, replies.Count);
|
||||
Assert.Equal("LISTEN", ReadReplyType(replies[0]));
|
||||
Assert.Equal("EOS", ReadReplyType(replies[1]));
|
||||
|
||||
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
||||
Assert.Equal("pastoral", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
||||
Assert.Equal("guess", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
Assert.Equal("pastoral", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("guess").GetString());
|
||||
Assert.Equal("word-of-the-day/puzzle", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BufferedAudio_WithSyntheticTranscriptHint_FinalizesThroughSttSeam()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user