Files
ChatbotAI/app/Services/Document/ChunkEditingService.php
2026-05-18 08:56:23 +08:00

219 lines
7.2 KiB
PHP

<?php
namespace App\Services\Document;
use App\Jobs\ReindexChunkJob;
use App\Models\ChunkAudit;
use App\Models\DocumentChunk;
use App\Services\KnowledgeBase\AuditService;
use App\Services\Qdrant\QdrantService;
use Illuminate\Support\Facades\DB;
use RuntimeException;
/**
* ChunkEditingService
*
* Menguruskan operasi edit dan toggle status untuk satu chunk:
* - Edit final_text
* - Exclude chunk dari indexing
* - Include semula chunk ke indexing
*
* Setiap operasi:
* 1. Kemaskini rekod MySQL
* 2. Sync status ke Qdrant jika perlu
* 3. Rekod chunk_audits
* 4. Log ke audit_logs
* 5. Dispatch ReindexChunkJob jika perlu
*/
class ChunkEditingService
{
public function __construct(
private readonly QdrantService $qdrant,
private readonly AuditService $audit,
) {}
// =========================================================================
// EDIT FINAL TEXT
// =========================================================================
/**
* Edit final_text sebuah chunk.
*
* Raw_text (content) tidak disentuh.
* Selepas edit, chunk ditandakan needs_reindex dan ReindexChunkJob diantrikan.
*
* @throws RuntimeException Jika chunk tidak boleh diedit (e.g. superseded)
*/
public function editFinalText(
DocumentChunk $chunk,
string $newFinalText,
?string $notes = null
): void {
if ($chunk->isSuperseded()) {
throw new RuntimeException(
'Chunk yang telah digantikan (superseded) tidak boleh diedit.'
);
}
$oldFinalText = $chunk->final_text;
$oldStatus = $chunk->chunk_status;
DB::transaction(function () use ($chunk, $newFinalText, $notes, $oldFinalText, $oldStatus) {
$chunk->update([
'final_text' => $newFinalText,
'is_edited' => true,
'chunk_status' => DocumentChunk::STATUS_NEEDS_REINDEX,
'needs_reindex' => true,
'edited_by' => auth()->id(),
'edited_at' => now(),
]);
ChunkAudit::record($chunk->id, ChunkAudit::OP_EDIT_FINAL_TEXT, [
'old_final_text' => $oldFinalText,
'new_final_text' => $newFinalText,
'old_status' => $oldStatus,
'new_status' => DocumentChunk::STATUS_NEEDS_REINDEX,
'metadata' => [
'word_count_before' => str_word_count($oldFinalText ?? $chunk->content),
'word_count_after' => str_word_count($newFinalText),
'char_count_before' => mb_strlen($oldFinalText ?? $chunk->content),
'char_count_after' => mb_strlen($newFinalText),
],
], $notes);
});
$this->audit->chunkFinalTextEdited($chunk, $oldFinalText, $newFinalText);
// Hantar ke queue untuk reindex
ReindexChunkJob::dispatch($chunk->id);
}
// =========================================================================
// EXCLUDE / INCLUDE
// =========================================================================
/**
* Kecualikan chunk dari indexing.
*
* - is_active = false
* - chunk_status = 'excluded'
* - Qdrant point ditandakan tidak aktif (jika ada)
*/
public function excludeChunk(DocumentChunk $chunk, ?string $notes = null): void
{
if ($chunk->chunk_status === DocumentChunk::STATUS_EXCLUDED) {
return; // Sudah excluded — tidak perlu buat apa-apa
}
if ($chunk->isSuperseded()) {
throw new RuntimeException(
'Chunk superseded tidak boleh di-exclude secara manual.'
);
}
$oldStatus = $chunk->chunk_status;
DB::transaction(function () use ($chunk, $notes, $oldStatus) {
$chunk->markAsExcluded();
// Deactivate di Qdrant jika ada point
if ($chunk->qdrant_point_id) {
$this->qdrant->updatePayload($chunk->qdrant_point_id, [
'is_active' => false,
'status' => 'excluded',
]);
}
ChunkAudit::record($chunk->id, ChunkAudit::OP_EXCLUDE, [
'old_status' => $oldStatus,
'new_status' => DocumentChunk::STATUS_EXCLUDED,
], $notes);
});
$this->audit->chunkExcluded($chunk, $oldStatus);
}
/**
* Kembalikan chunk ke indexing.
*
* - is_active = true
* - exclude_from_index = false
* - Jika sudah embedded: reactivate di Qdrant + status kembali 'indexed'
* - Jika belum embedded: queue reindex
*
* @throws RuntimeException Jika chunk adalah superseded (tidak boleh di-include)
*/
public function includeChunk(DocumentChunk $chunk, ?string $notes = null): void
{
if ($chunk->isSuperseded()) {
throw new RuntimeException(
'Chunk yang telah digantikan (superseded) tidak boleh dikembalikan. '
. 'Gunakan child chunks yang dihasilkan dari split.'
);
}
if (! $chunk->exclude_from_index && $chunk->is_active) {
return; // Sudah active — tidak perlu buat apa-apa
}
$oldStatus = $chunk->chunk_status;
DB::transaction(function () use ($chunk, $notes, $oldStatus) {
$chunk->markAsIncluded();
// Jika ada Qdrant point, aktifkan semula
if ($chunk->qdrant_point_id && $chunk->is_embedded) {
$this->qdrant->updatePayload($chunk->qdrant_point_id, [
'is_active' => true,
'status' => 'active',
]);
}
ChunkAudit::record($chunk->id, ChunkAudit::OP_INCLUDE, [
'old_status' => $oldStatus,
'new_status' => $chunk->fresh()->chunk_status,
], $notes);
});
$this->audit->chunkIncluded($chunk, $oldStatus);
// Queue reindex jika chunk belum embedded atau final_text berubah
if ($chunk->fresh()->needs_reindex) {
ReindexChunkJob::dispatch($chunk->id);
}
}
// =========================================================================
// TRIGGER REINDEX
// =========================================================================
/**
* Tandakan chunk perlu reindex dan dispatch job.
* Digunakan oleh admin apabila mahu refresh embedding tanpa edit teks.
*/
public function triggerReindex(DocumentChunk $chunk, ?string $notes = null): void
{
if (! $chunk->isIndexable()) {
throw new RuntimeException(
'Chunk ini tidak boleh direindex (status: ' . $chunk->chunk_status . ').'
);
}
$oldStatus = $chunk->chunk_status;
$chunk->update([
'chunk_status' => DocumentChunk::STATUS_NEEDS_REINDEX,
'needs_reindex' => true,
]);
ChunkAudit::record($chunk->id, ChunkAudit::OP_REINDEX, [
'old_status' => $oldStatus,
'new_status' => DocumentChunk::STATUS_NEEDS_REINDEX,
], $notes);
$this->audit->chunkReindexTriggered($chunk);
ReindexChunkJob::dispatch($chunk->id);
}
}