First commit
This commit is contained in:
171
app/Jobs/ReindexChunkJob.php
Normal file
171
app/Jobs/ReindexChunkJob.php
Normal file
@@ -0,0 +1,171 @@
|
||||
<?php
|
||||
|
||||
namespace App\Jobs;
|
||||
|
||||
use App\Models\DocumentChunk;
|
||||
use App\Models\ProcessingLog;
|
||||
use App\Services\Ollama\OllamaService;
|
||||
use App\Services\Qdrant\QdrantService;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Queue\Queueable;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Str;
|
||||
use RuntimeException;
|
||||
|
||||
/**
|
||||
* ReindexChunkJob
|
||||
*
|
||||
* Reindex satu chunk sahaja:
|
||||
* 1. Embed semula getEmbeddableText() (final_text > cleaned_text > content)
|
||||
* 2. Upsert point ke Qdrant (guna semula qdrant_point_id lama jika ada)
|
||||
* 3. Kemaskini chunk: markAsEmbedded(), needs_reindex=false
|
||||
*
|
||||
* Berbeza dari ReindexDocumentJob yang reindex SELURUH dokumen.
|
||||
* Job ini digunakan untuk:
|
||||
* - Edit final_text oleh admin
|
||||
* - Include semula chunk yang excluded
|
||||
* - Manual trigger reindex
|
||||
* - Child chunks hasil split (perlu embed buat pertama kali)
|
||||
*/
|
||||
class ReindexChunkJob implements ShouldQueue
|
||||
{
|
||||
use Queueable;
|
||||
|
||||
public int $tries = 3;
|
||||
public int $backoff = 30; // Tunggu 30s sebelum retry
|
||||
public int $timeout = 120; // 2 minit maksimum per chunk
|
||||
|
||||
public function __construct(
|
||||
public readonly int $chunkId,
|
||||
) {
|
||||
$this->onQueue(config('knowledgebase.queue.ingestion', 'default'));
|
||||
}
|
||||
|
||||
public function handle(OllamaService $ollama, QdrantService $qdrant): void
|
||||
{
|
||||
// Load chunk dengan semua relasi yang diperlukan untuk toQdrantPayload()
|
||||
$chunk = DocumentChunk::with(['document.category', 'documentVersion'])
|
||||
->find($this->chunkId);
|
||||
|
||||
if (! $chunk) {
|
||||
Log::warning("ReindexChunkJob: Chunk #{$this->chunkId} tidak dijumpai. Job dilangkau.");
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Guard: Jangan reindex chunk yang tidak sepatutnya ──────────────
|
||||
|
||||
if ($chunk->isSuperseded()) {
|
||||
Log::info("ReindexChunkJob: Chunk #{$this->chunkId} adalah superseded. Job dilangkau.", [
|
||||
'chunk_index' => $chunk->chunk_index,
|
||||
]);
|
||||
return;
|
||||
}
|
||||
|
||||
if ($chunk->exclude_from_index) {
|
||||
Log::info("ReindexChunkJob: Chunk #{$this->chunkId} dikecualikan dari index. Job dilangkau.", [
|
||||
'chunk_index' => $chunk->chunk_index,
|
||||
'chunk_status' => $chunk->chunk_status,
|
||||
]);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Ambil teks untuk embedding ─────────────────────────────────────
|
||||
|
||||
$textToEmbed = $chunk->getEmbeddableText();
|
||||
|
||||
if (empty(trim($textToEmbed))) {
|
||||
$chunk->update(['chunk_status' => DocumentChunk::STATUS_FAILED_EMBEDDING]);
|
||||
|
||||
Log::error("ReindexChunkJob: Chunk #{$this->chunkId} mempunyai teks kosong.", [
|
||||
'has_final_text' => ! is_null($chunk->final_text),
|
||||
'has_cleaned_text' => ! is_null($chunk->cleaned_text),
|
||||
'content_length' => mb_strlen($chunk->content),
|
||||
]);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Log: mula proses ───────────────────────────────────────────────
|
||||
|
||||
ProcessingLog::record(
|
||||
DocumentChunk::class,
|
||||
$chunk->id,
|
||||
ProcessingLog::STAGE_EMBED,
|
||||
ProcessingLog::STATUS_STARTED,
|
||||
null,
|
||||
[
|
||||
'chunk_index' => $chunk->chunk_index,
|
||||
'text_source' => $chunk->final_text ? 'final_text'
|
||||
: ($chunk->cleaned_text ? 'cleaned_text' : 'content'),
|
||||
'text_length' => mb_strlen($textToEmbed),
|
||||
'is_reindex' => true,
|
||||
]
|
||||
);
|
||||
|
||||
try {
|
||||
// ── Embed teks ─────────────────────────────────────────────────
|
||||
$vector = $ollama->embed($textToEmbed);
|
||||
|
||||
// ── Tentukan point ID ──────────────────────────────────────────
|
||||
// Guna semula qdrant_point_id lama jika ada → upsert akan overwrite
|
||||
// Ini mengelakkan "ghost points" yang tidak dirujuk oleh mana-mana chunk
|
||||
$pointId = $chunk->qdrant_point_id ?? (string) Str::uuid();
|
||||
|
||||
// ── Upsert ke Qdrant ───────────────────────────────────────────
|
||||
$qdrant->ensureCollectionExists();
|
||||
$qdrant->upsertPoint($pointId, $vector, $chunk->toQdrantPayload());
|
||||
|
||||
// ── Kemaskini chunk ────────────────────────────────────────────
|
||||
$chunk->markAsEmbedded($pointId);
|
||||
|
||||
ProcessingLog::record(
|
||||
DocumentChunk::class,
|
||||
$chunk->id,
|
||||
ProcessingLog::STAGE_QDRANT,
|
||||
ProcessingLog::STATUS_COMPLETED,
|
||||
null,
|
||||
[
|
||||
'point_id' => $pointId,
|
||||
'is_reindex' => true,
|
||||
]
|
||||
);
|
||||
|
||||
Log::info("ReindexChunkJob: Chunk #{$this->chunkId} berjaya direindex.", [
|
||||
'chunk_index' => $chunk->chunk_index,
|
||||
'point_id' => $pointId,
|
||||
]);
|
||||
|
||||
} catch (RuntimeException $e) {
|
||||
// Tandakan failed — job akan cuba semula (mengikut $tries)
|
||||
$chunk->update(['chunk_status' => DocumentChunk::STATUS_FAILED_EMBEDDING]);
|
||||
|
||||
ProcessingLog::record(
|
||||
DocumentChunk::class,
|
||||
$chunk->id,
|
||||
ProcessingLog::STAGE_EMBED,
|
||||
ProcessingLog::STATUS_FAILED,
|
||||
$e->getMessage(),
|
||||
['attempt' => $this->attempts()]
|
||||
);
|
||||
|
||||
Log::error("ReindexChunkJob: Gagal reindex chunk #{$this->chunkId}.", [
|
||||
'error' => $e->getMessage(),
|
||||
'attempt' => $this->attempts(),
|
||||
]);
|
||||
|
||||
throw $e; // Allow retry
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Dipanggil selepas semua retry habis.
|
||||
*/
|
||||
public function failed(\Throwable $e): void
|
||||
{
|
||||
DocumentChunk::where('id', $this->chunkId)
|
||||
->update(['chunk_status' => DocumentChunk::STATUS_FAILED_EMBEDDING]);
|
||||
|
||||
Log::error("ReindexChunkJob: Chunk #{$this->chunkId} gagal selepas semua cubaan semula.", [
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user