219 lines
7.2 KiB
PHP
219 lines
7.2 KiB
PHP
<?php
|
|
|
|
namespace App\Services\Document;
|
|
|
|
use App\Jobs\ReindexChunkJob;
|
|
use App\Models\ChunkAudit;
|
|
use App\Models\DocumentChunk;
|
|
use App\Services\KnowledgeBase\AuditService;
|
|
use App\Services\Qdrant\QdrantService;
|
|
use Illuminate\Support\Facades\DB;
|
|
use RuntimeException;
|
|
|
|
/**
|
|
* ChunkEditingService
|
|
*
|
|
* Menguruskan operasi edit dan toggle status untuk satu chunk:
|
|
* - Edit final_text
|
|
* - Exclude chunk dari indexing
|
|
* - Include semula chunk ke indexing
|
|
*
|
|
* Setiap operasi:
|
|
* 1. Kemaskini rekod MySQL
|
|
* 2. Sync status ke Qdrant jika perlu
|
|
* 3. Rekod chunk_audits
|
|
* 4. Log ke audit_logs
|
|
* 5. Dispatch ReindexChunkJob jika perlu
|
|
*/
|
|
class ChunkEditingService
|
|
{
|
|
public function __construct(
|
|
private readonly QdrantService $qdrant,
|
|
private readonly AuditService $audit,
|
|
) {}
|
|
|
|
// =========================================================================
|
|
// EDIT FINAL TEXT
|
|
// =========================================================================
|
|
|
|
/**
|
|
* Edit final_text sebuah chunk.
|
|
*
|
|
* Raw_text (content) tidak disentuh.
|
|
* Selepas edit, chunk ditandakan needs_reindex dan ReindexChunkJob diantrikan.
|
|
*
|
|
* @throws RuntimeException Jika chunk tidak boleh diedit (e.g. superseded)
|
|
*/
|
|
public function editFinalText(
|
|
DocumentChunk $chunk,
|
|
string $newFinalText,
|
|
?string $notes = null
|
|
): void {
|
|
if ($chunk->isSuperseded()) {
|
|
throw new RuntimeException(
|
|
'Chunk yang telah digantikan (superseded) tidak boleh diedit.'
|
|
);
|
|
}
|
|
|
|
$oldFinalText = $chunk->final_text;
|
|
$oldStatus = $chunk->chunk_status;
|
|
|
|
DB::transaction(function () use ($chunk, $newFinalText, $notes, $oldFinalText, $oldStatus) {
|
|
$chunk->update([
|
|
'final_text' => $newFinalText,
|
|
'is_edited' => true,
|
|
'chunk_status' => DocumentChunk::STATUS_NEEDS_REINDEX,
|
|
'needs_reindex' => true,
|
|
'edited_by' => auth()->id(),
|
|
'edited_at' => now(),
|
|
]);
|
|
|
|
ChunkAudit::record($chunk->id, ChunkAudit::OP_EDIT_FINAL_TEXT, [
|
|
'old_final_text' => $oldFinalText,
|
|
'new_final_text' => $newFinalText,
|
|
'old_status' => $oldStatus,
|
|
'new_status' => DocumentChunk::STATUS_NEEDS_REINDEX,
|
|
'metadata' => [
|
|
'word_count_before' => str_word_count($oldFinalText ?? $chunk->content),
|
|
'word_count_after' => str_word_count($newFinalText),
|
|
'char_count_before' => mb_strlen($oldFinalText ?? $chunk->content),
|
|
'char_count_after' => mb_strlen($newFinalText),
|
|
],
|
|
], $notes);
|
|
});
|
|
|
|
$this->audit->chunkFinalTextEdited($chunk, $oldFinalText, $newFinalText);
|
|
|
|
// Hantar ke queue untuk reindex
|
|
ReindexChunkJob::dispatch($chunk->id);
|
|
}
|
|
|
|
// =========================================================================
|
|
// EXCLUDE / INCLUDE
|
|
// =========================================================================
|
|
|
|
/**
|
|
* Kecualikan chunk dari indexing.
|
|
*
|
|
* - is_active = false
|
|
* - chunk_status = 'excluded'
|
|
* - Qdrant point ditandakan tidak aktif (jika ada)
|
|
*/
|
|
public function excludeChunk(DocumentChunk $chunk, ?string $notes = null): void
|
|
{
|
|
if ($chunk->chunk_status === DocumentChunk::STATUS_EXCLUDED) {
|
|
return; // Sudah excluded — tidak perlu buat apa-apa
|
|
}
|
|
|
|
if ($chunk->isSuperseded()) {
|
|
throw new RuntimeException(
|
|
'Chunk superseded tidak boleh di-exclude secara manual.'
|
|
);
|
|
}
|
|
|
|
$oldStatus = $chunk->chunk_status;
|
|
|
|
DB::transaction(function () use ($chunk, $notes, $oldStatus) {
|
|
$chunk->markAsExcluded();
|
|
|
|
// Deactivate di Qdrant jika ada point
|
|
if ($chunk->qdrant_point_id) {
|
|
$this->qdrant->updatePayload($chunk->qdrant_point_id, [
|
|
'is_active' => false,
|
|
'status' => 'excluded',
|
|
]);
|
|
}
|
|
|
|
ChunkAudit::record($chunk->id, ChunkAudit::OP_EXCLUDE, [
|
|
'old_status' => $oldStatus,
|
|
'new_status' => DocumentChunk::STATUS_EXCLUDED,
|
|
], $notes);
|
|
});
|
|
|
|
$this->audit->chunkExcluded($chunk, $oldStatus);
|
|
}
|
|
|
|
/**
|
|
* Kembalikan chunk ke indexing.
|
|
*
|
|
* - is_active = true
|
|
* - exclude_from_index = false
|
|
* - Jika sudah embedded: reactivate di Qdrant + status kembali 'indexed'
|
|
* - Jika belum embedded: queue reindex
|
|
*
|
|
* @throws RuntimeException Jika chunk adalah superseded (tidak boleh di-include)
|
|
*/
|
|
public function includeChunk(DocumentChunk $chunk, ?string $notes = null): void
|
|
{
|
|
if ($chunk->isSuperseded()) {
|
|
throw new RuntimeException(
|
|
'Chunk yang telah digantikan (superseded) tidak boleh dikembalikan. '
|
|
. 'Gunakan child chunks yang dihasilkan dari split.'
|
|
);
|
|
}
|
|
|
|
if (! $chunk->exclude_from_index && $chunk->is_active) {
|
|
return; // Sudah active — tidak perlu buat apa-apa
|
|
}
|
|
|
|
$oldStatus = $chunk->chunk_status;
|
|
|
|
DB::transaction(function () use ($chunk, $notes, $oldStatus) {
|
|
$chunk->markAsIncluded();
|
|
|
|
// Jika ada Qdrant point, aktifkan semula
|
|
if ($chunk->qdrant_point_id && $chunk->is_embedded) {
|
|
$this->qdrant->updatePayload($chunk->qdrant_point_id, [
|
|
'is_active' => true,
|
|
'status' => 'active',
|
|
]);
|
|
}
|
|
|
|
ChunkAudit::record($chunk->id, ChunkAudit::OP_INCLUDE, [
|
|
'old_status' => $oldStatus,
|
|
'new_status' => $chunk->fresh()->chunk_status,
|
|
], $notes);
|
|
});
|
|
|
|
$this->audit->chunkIncluded($chunk, $oldStatus);
|
|
|
|
// Queue reindex jika chunk belum embedded atau final_text berubah
|
|
if ($chunk->fresh()->needs_reindex) {
|
|
ReindexChunkJob::dispatch($chunk->id);
|
|
}
|
|
}
|
|
|
|
// =========================================================================
|
|
// TRIGGER REINDEX
|
|
// =========================================================================
|
|
|
|
/**
|
|
* Tandakan chunk perlu reindex dan dispatch job.
|
|
* Digunakan oleh admin apabila mahu refresh embedding tanpa edit teks.
|
|
*/
|
|
public function triggerReindex(DocumentChunk $chunk, ?string $notes = null): void
|
|
{
|
|
if (! $chunk->isIndexable()) {
|
|
throw new RuntimeException(
|
|
'Chunk ini tidak boleh direindex (status: ' . $chunk->chunk_status . ').'
|
|
);
|
|
}
|
|
|
|
$oldStatus = $chunk->chunk_status;
|
|
|
|
$chunk->update([
|
|
'chunk_status' => DocumentChunk::STATUS_NEEDS_REINDEX,
|
|
'needs_reindex' => true,
|
|
]);
|
|
|
|
ChunkAudit::record($chunk->id, ChunkAudit::OP_REINDEX, [
|
|
'old_status' => $oldStatus,
|
|
'new_status' => DocumentChunk::STATUS_NEEDS_REINDEX,
|
|
], $notes);
|
|
|
|
$this->audit->chunkReindexTriggered($chunk);
|
|
|
|
ReindexChunkJob::dispatch($chunk->id);
|
|
}
|
|
}
|