Files
ChatbotAI/app/Services/KnowledgeBase/RAGService.php
2026-05-18 08:56:23 +08:00

250 lines
8.6 KiB
PHP

<?php
namespace App\Services\KnowledgeBase;
use App\Services\Ollama\OllamaService;
use App\Services\Qdrant\QdrantService;
use Illuminate\Support\Facades\Log;
use RuntimeException;
/**
* RAGService (Retrieval-Augmented Generation)
*
* Koordinasi proses RAG:
* 1. Jana embedding untuk soalan user
* 2. Cari context paling relevan dari Qdrant
* 3. Bina context string
* 4. Hantar ke Ollama untuk jawapan
* 5. Return jawapan + source references
*/
class RAGService
{
private int $maxContextChunks;
private int $maxContextWords;
public function __construct(
private readonly OllamaService $ollama,
private readonly QdrantService $qdrant,
) {
$this->maxContextChunks = config('knowledgebase.rag.max_context_chunks', 5);
$this->maxContextWords = config('knowledgebase.rag.max_context_words', 2000);
}
/**
* Jawab soalan menggunakan RAG.
*
* @param string $question Soalan pengguna
* @param ?int $categoryId Filter kategori (null = semua)
* @return array{
* answer: string,
* has_answer: bool,
* sources: array[],
* context_chunks: array[],
* model_used: string,
* tokens_used: ?int,
* response_time: float
* }
* @throws RuntimeException Jika Ollama atau Qdrant tidak tersedia
*/
public function ask(string $question, ?int $categoryId = null): array
{
$startTime = microtime(true);
// ── Langkah 1: Jana embedding untuk soalan ─────────────────────────
$queryVector = $this->ollama->embed($question);
// ── Langkah 2: Cari context relevan dari Qdrant ─────────────────────
$filter = $this->qdrant->buildFilter(
categoryId: $categoryId,
isActive: true,
);
$scoreThreshold = config('qdrant.search.score_threshold', 0.3);
$searchResults = $this->qdrant->searchSimilar(
vector: $queryVector,
limit: $this->maxContextChunks,
filter: $filter,
scoreThreshold: $scoreThreshold,
);
//log search result
\Log::info('Qdrant search raw results', [
'question' => $question,
'results' => $searchResults,
]);
\Log::info('Qdrant raw results', [
'scores' => array_map(fn($r) => $r['score'] ?? null, $searchResults),
]);
if (empty($searchResults)) {
$responseTime = round(microtime(true) - $startTime, 3);
return [
'answer' => config('ollama.rag_system_prompt_no_result',
'Maaf, saya tidak menemui maklumat berkaitan dalam pangkalan pengetahuan kami. ' .
'Sila hubungi pejabat kami untuk maklumat lanjut.'),
'has_answer' => false,
'sources' => [],
'context_chunks' => [],
'model_used' => config('ollama.chat_model'),
'tokens_used' => null,
'response_time' => $responseTime,
];
}
// ── Langkah 3: Bina context string ─────────────────────────────────
[$context, $contextChunksData] = $this->buildContext($searchResults);
// ── Langkah 4: Hantar ke Ollama ─────────────────────────────────────
$chatResult = $this->ollama->chat($question, $context);
// ── Langkah 5: Bina source references ──────────────────────────────
$sources = $this->buildSourceReferences($searchResults);
$responseTime = round(microtime(true) - $startTime, 3);
// Tentukan sama ada model ada jawapan atau tidak
$hasAnswer = $this->detectHasAnswer($chatResult['answer']);
return [
'answer' => $chatResult['answer'],
'has_answer' => $hasAnswer,
'sources' => $sources,
'context_chunks' => $contextChunksData,
'model_used' => $chatResult['model'],
'tokens_used' => $chatResult['tokens'],
'response_time' => $responseTime,
];
}
/**
* Bina context string dari search results.
* Had bilangan perkataan supaya tidak melebihi context window model.
*
* @return array{0: string, 1: array[]}
*/
private function buildContext(array $searchResults): array
{
$contextParts = [];
$chunksData = [];
$totalWords = 0;
foreach ($searchResults as $result) {
$payload = $result['payload'] ?? [];
$text = $payload['text'] ?? '';
if (empty($text)) {
continue;
}
$words = str_word_count($text);
if ($totalWords + $words > $this->maxContextWords) {
// Potong jika context dah terlalu panjang
if (empty($contextParts)) {
// Sekurang-kurangnya masukkan satu chunk
$contextParts[] = $text;
$chunksData[] = $this->extractChunkData($result);
}
break;
}
$source = $this->formatSourceLabel($payload);
$contextParts[] = "[Sumber: {$source}]\n{$text}";
$chunksData[] = $this->extractChunkData($result);
$totalWords += $words;
}
return [implode("\n\n---\n\n", $contextParts), $chunksData];
}
/**
* Bina array source references untuk paparan kepada pengguna.
*/
private function buildSourceReferences(array $searchResults): array
{
$sources = [];
$seen = []; // Elak duplikasi sumber yang sama
foreach ($searchResults as $result) {
$payload = $result['payload'] ?? [];
$sourceKey = ($payload['document_id'] ?? '') . '_' .
($payload['knowledge_item_id'] ?? '') . '_' .
($payload['page_number'] ?? '');
if (isset($seen[$sourceKey])) {
continue;
}
$seen[$sourceKey] = true;
$sources[] = [
'type' => $payload['source_type'] ?? 'unknown',
'knowledge_type' => $payload['knowledge_type'] ?? '',
'title' => $payload['title'] ?? 'Tiada tajuk',
'category' => $payload['category_name'] ?? '',
'category_id' => $payload['category_id'] ?? null,
'page_number' => $payload['page_number'] ?? null,
'section_heading' => $payload['section_heading'] ?? null,
'document_id' => $payload['document_id'] ?? null,
'knowledge_item_id' => $payload['knowledge_item_id'] ?? null,
'score' => round($result['score'] ?? 0, 4),
];
}
return $sources;
}
/**
* Extract data chunk untuk disimpan dalam chat_logs.
*/
private function extractChunkData(array $result): array
{
return [
'point_id' => $result['id'] ?? null,
'score' => round($result['score'] ?? 0, 4),
'title' => $result['payload']['title'] ?? '',
'category' => $result['payload']['category_name'] ?? '',
'source_type' => $result['payload']['source_type'] ?? '',
'page_number' => $result['payload']['page_number'] ?? null,
];
}
private function formatSourceLabel(array $payload): string
{
$title = $payload['title'] ?? 'Tanpa tajuk';
$page = isset($payload['page_number']) ? ", ms. {$payload['page_number']}" : '';
$category = $payload['category_name'] ?? '';
return "{$title}{$page} ({$category})";
}
/**
* Detect sama ada model sebenarnya ada jawapan atau tidak.
* Semak jika jawapan adalah "tidak tahu" / fallback.
*/
private function detectHasAnswer(string $answer): bool
{
$noAnswerPatterns = [
'tidak menemui',
'tiada maklumat',
'tidak terdapat dalam',
'sila hubungi',
'tidak dapat menjawab',
'maklumat tidak tersedia',
];
$answerLower = mb_strtolower($answer);
foreach ($noAnswerPatterns as $pattern) {
if (str_contains($answerLower, $pattern)) {
return false;
}
}
return !empty(trim($answer));
}
}