Wiki source code of FactHarbor POC1 Architecture Analysis 01.Jan.26
Version 10.1 by Robert Schaub on 2026/01/02 10:13
Show last authors
| author | version | line-number | content |
|---|---|---|---|
| 1 | = FactHarbor POC1 Architecture Analysis = | ||
| 2 | |||
| 3 | **Version:** 2.6.17 | ||
| 4 | **Analysis Date:** January 2026 | ||
| 5 | **Document Purpose:** Technical diagrams, gap analysis, and optimization recommendations | ||
| 6 | |||
| 7 | ---- | ||
| 8 | |||
| 9 | == 1. AKEL Flow Diagram (with LLM and WebSearch Interactions) == | ||
| 10 | |||
| 11 | |||
| 12 | {{mermaid}} | ||
| 13 | flowchart TB | ||
| 14 | subgraph Input["📥 Input Layer"] | ||
| 15 | URL[URL Input] | ||
| 16 | TEXT[Text Input] | ||
| 17 | end | ||
| 18 | |||
| 19 | subgraph Retrieval["🔍 Content Retrieval"] | ||
| 20 | FETCH[extractTextFromUrl] | ||
| 21 | PDF[PDF Parser<br/>pdf-parse v1] | ||
| 22 | HTML[HTML Parser<br/>cheerio] | ||
| 23 | end | ||
| 24 | |||
| 25 | subgraph AKEL["🧠 AKEL Pipeline"] | ||
| 26 | direction TB | ||
| 27 | |||
| 28 | subgraph Step1["Step 1: Understand"] | ||
| 29 | UNDERSTAND[understandClaim<br/>━━━━━━━━━━━━━<br/>• Detect input type<br/>• Extract claims<br/>• Identify dependencies<br/>• Assign risk tiers] | ||
| 30 | LLM1[("🤖 LLM Call #1<br/>Claude/GPT/Gemini")] | ||
| 31 | end | ||
| 32 | |||
| 33 | subgraph Step2["Step 2: Research (Iterative)"] | ||
| 34 | DECIDE[decideNextResearch<br/>━━━━━━━━━━━━━<br/>• Generate queries<br/>• Focus areas] | ||
| 35 | |||
| 36 | SEARCH[("🌐 Web Search<br/>Google CSE / SerpAPI")] | ||
| 37 | |||
| 38 | FETCHSRC[fetchSourceContent<br/>━━━━━━━━━━━━━<br/>• Parallel fetching<br/>• Timeout handling] | ||
| 39 | |||
| 40 | EXTRACT[extractFacts<br/>━━━━━━━━━━━━━<br/>• Parse sources<br/>• Extract facts] | ||
| 41 | LLM2[("🤖 LLM Call #2-N<br/>Per source")] | ||
| 42 | end | ||
| 43 | |||
| 44 | subgraph Step3["Step 3: Verdict Generation"] | ||
| 45 | VERDICT[generateVerdicts<br/>━━━━━━━━━━━━━<br/>• Claim verdicts<br/>• Article verdict<br/>• Dependency propagation] | ||
| 46 | LLM3[("🤖 LLM Call #N+1<br/>Final synthesis")] | ||
| 47 | end | ||
| 48 | |||
| 49 | subgraph Step4["Step 4: Report"] | ||
| 50 | REPORT[buildTwoPanelSummary<br/>━━━━━━━━━━━━━<br/>• Format results<br/>• Generate markdown] | ||
| 51 | end | ||
| 52 | end | ||
| 53 | |||
| 54 | subgraph Output["📤 Output"] | ||
| 55 | RESULT[AnalysisResult JSON] | ||
| 56 | MARKDOWN[Report Markdown] | ||
| 57 | end | ||
| 58 | |||
| 59 | %% Flow connections | ||
| 60 | URL --> FETCH | ||
| 61 | TEXT --> UNDERSTAND | ||
| 62 | FETCH --> PDF | ||
| 63 | FETCH --> HTML | ||
| 64 | PDF --> UNDERSTAND | ||
| 65 | HTML --> UNDERSTAND | ||
| 66 | |||
| 67 | UNDERSTAND --> LLM1 | ||
| 68 | LLM1 --> DECIDE | ||
| 69 | |||
| 70 | DECIDE --> SEARCH | ||
| 71 | SEARCH --> FETCHSRC | ||
| 72 | FETCHSRC --> EXTRACT | ||
| 73 | EXTRACT --> LLM2 | ||
| 74 | LLM2 --> DECIDE | ||
| 75 | |||
| 76 | DECIDE -->|"Research Complete"| VERDICT | ||
| 77 | VERDICT --> LLM3 | ||
| 78 | LLM3 --> REPORT | ||
| 79 | |||
| 80 | REPORT --> RESULT | ||
| 81 | REPORT --> MARKDOWN | ||
| 82 | |||
| 83 | %% Styling | ||
| 84 | classDef llm fill:#e1f5fe,stroke:#01579b,stroke-width:2px | ||
| 85 | classDef search fill:#fff3e0,stroke:#e65100,stroke-width:2px | ||
| 86 | classDef step fill:#f3e5f5,stroke:#4a148c,stroke-width:2px | ||
| 87 | |||
| 88 | class LLM1,LLM2,LLM3 llm | ||
| 89 | class SEARCH search | ||
| 90 | class UNDERSTAND,DECIDE,FETCHSRC,EXTRACT,VERDICT,REPORT step | ||
| 91 | {{/mermaid}} | ||
| 92 | |||
| 93 | ---- | ||
| 94 | |||
| 95 | == 2. ERD Data Model (Current POC1 Implementation) == | ||
| 96 | |||
| 97 | **Data Objects ERD** | ||
| 98 | |||
| 99 | {{mermaid}} | ||
| 100 | erDiagram | ||
| 101 | ARTICLE ||--o{ CLAIM : "contains" | ||
| 102 | ARTICLE ||--|| ARTICLE_VERDICT : "has" | ||
| 103 | CLAIM ||--|| CLAIM_VERDICT : "has" | ||
| 104 | CLAIM ||--o{ CLAIM : "depends on" | ||
| 105 | CLAIM_VERDICT }o--o{ EVIDENCE : "supported by" | ||
| 106 | SOURCE ||--o{ EVIDENCE : "provides" | ||
| 107 | ARTICLE ||--o{ SOURCE : "references" | ||
| 108 | |||
| 109 | ARTICLE { | ||
| 110 | string id PK "Unique identifier (job ID)" | ||
| 111 | string inputType "text | url" | ||
| 112 | string inputValue "Original URL or text" | ||
| 113 | string articleThesis "Main argument/thesis" | ||
| 114 | string detectedInputType "question | claim | article" | ||
| 115 | boolean isQuestion "True if input is a question" | ||
| 116 | datetime createdAt "Analysis timestamp" | ||
| 117 | datetime updatedAt "Last update" | ||
| 118 | json distinctProceedings "Legal proceedings if any" | ||
| 119 | boolean hasMultipleProceedings "Multi-proceeding flag" | ||
| 120 | string proceedingContext "Context for proceedings" | ||
| 121 | json logicalFallacies "Detected fallacies array" | ||
| 122 | boolean isPseudoscience "Pseudoscience detection" | ||
| 123 | string_array pseudoscienceCategories "Categories if detected" | ||
| 124 | int llmCalls "Total LLM API calls" | ||
| 125 | json searchQueries "All search queries performed" | ||
| 126 | string schemaVersion "e.g. 2.6.17" | ||
| 127 | } | ||
| 128 | |||
| 129 | CLAIM { | ||
| 130 | string id PK "SC1, SC2, C1, etc." | ||
| 131 | string articleId FK "Parent article" | ||
| 132 | string text "The claim statement" | ||
| 133 | string type "legal | procedural | factual | evaluative" | ||
| 134 | string claimRole "attribution | source | timing | core" | ||
| 135 | string_array dependsOn "IDs of prerequisite claims" | ||
| 136 | string_array keyEntities "Named entities in claim" | ||
| 137 | boolean isCentral "Is this a central claim?" | ||
| 138 | string relatedProceedingId "Linked proceeding if any" | ||
| 139 | int startOffset "Position in original text" | ||
| 140 | int endOffset "End position in original text" | ||
| 141 | string approximatePosition "Descriptive position" | ||
| 142 | } | ||
| 143 | |||
| 144 | CLAIM_VERDICT { | ||
| 145 | string id PK "Same as claim ID" | ||
| 146 | string claimId FK "Reference to claim" | ||
| 147 | string llmVerdict "WELL-SUPPORTED | PARTIALLY-SUPPORTED | UNCERTAIN | REFUTED" | ||
| 148 | string verdict "True | Mostly True | Leaning True | Unverified | Leaning False | Mostly False | False" | ||
| 149 | int confidence "0-100 LLM confidence" | ||
| 150 | int truthPercentage "0-100 calibrated truth score" | ||
| 151 | string riskTier "A (high) | B (medium) | C (low)" | ||
| 152 | string reasoning "Explanation of verdict" | ||
| 153 | string_array supportingFactIds "Evidence IDs supporting this" | ||
| 154 | boolean dependencyFailed "True if prerequisite failed" | ||
| 155 | string_array failedDependencies "Which deps failed" | ||
| 156 | string highlightColor "green | light-green | yellow | orange | dark-orange | red | dark-red" | ||
| 157 | boolean isPseudoscience "Pseudoscience flag" | ||
| 158 | string escalationReason "Why verdict was escalated" | ||
| 159 | } | ||
| 160 | |||
| 161 | ARTICLE_VERDICT { | ||
| 162 | string id PK "Same as article ID" | ||
| 163 | string articleId FK "Reference to article" | ||
| 164 | string llmArticleVerdict "Original LLM verdict" | ||
| 165 | int llmArticleConfidence "Original LLM confidence" | ||
| 166 | string articleVerdict "True | Mostly True | Leaning True | Unverified | Leaning False | Mostly False | False" | ||
| 167 | int articleTruthPercentage "0-100 calibrated score" | ||
| 168 | string articleVerdictReason "Why verdict differs from claims avg" | ||
| 169 | int claimsAverageTruthPercentage "Average of claim verdicts" | ||
| 170 | string claimsAverageVerdict "7-point average verdict" | ||
| 171 | int claimsTotal "Total claims analyzed" | ||
| 172 | int claimsSupported "Claims with truth >= 72%" | ||
| 173 | int claimsUncertain "Claims with truth 43-71%" | ||
| 174 | int claimsRefuted "Claims with truth < 43%" | ||
| 175 | int centralClaimsTotal "Number of central claims" | ||
| 176 | int centralClaimsSupported "Central claims supported" | ||
| 177 | } | ||
| 178 | |||
| 179 | EVIDENCE { | ||
| 180 | string id PK "S1-F1, S1-F2 format" | ||
| 181 | string sourceId FK "Reference to source" | ||
| 182 | string claimId FK "Optional: specific claim this supports" | ||
| 183 | string fact "The factual statement extracted" | ||
| 184 | string category "legal_provision | evidence | expert_quote | statistic | event | criticism" | ||
| 185 | string specificity "high | medium" | ||
| 186 | string sourceExcerpt "Original text excerpt" | ||
| 187 | string relatedProceedingId "Linked proceeding if any" | ||
| 188 | boolean isContestedClaim "Is this a contested assertion" | ||
| 189 | string claimSource "Who made contested claim" | ||
| 190 | } | ||
| 191 | |||
| 192 | SOURCE { | ||
| 193 | string id PK "S1, S2, etc." | ||
| 194 | string articleId FK "Parent article" | ||
| 195 | string url "Full URL" | ||
| 196 | string title "Page/document title" | ||
| 197 | string domain "Extracted domain" | ||
| 198 | int trackRecordScore "0-100 reliability score or null" | ||
| 199 | string fullText "Extracted content" | ||
| 200 | datetime fetchedAt "When content was fetched" | ||
| 201 | string category "news | academic | government | legal" | ||
| 202 | boolean fetchSuccess "True if fetch succeeded" | ||
| 203 | string searchQuery "Which query found this" | ||
| 204 | string mimeType "text/html | application/pdf" | ||
| 205 | } | ||
| 206 | {{/mermaid}} | ||
| 207 | |||
| 208 | **Data Usage ERD** | ||
| 209 | |||
| 210 | {{mermaid}} | ||
| 211 | erDiagram | ||
| 212 | JOB ||--o{ JOB_EVENT : "has" | ||
| 213 | JOB ||--|| ANALYSIS_RESULT : "produces" | ||
| 214 | ANALYSIS_RESULT ||--o{ CLAIM_VERDICT : "contains" | ||
| 215 | ANALYSIS_RESULT ||--o{ FETCHED_SOURCE : "references" | ||
| 216 | ANALYSIS_RESULT ||--o{ EXTRACTED_FACT : "contains" | ||
| 217 | CLAIM_VERDICT }o--o{ EXTRACTED_FACT : "supported by" | ||
| 218 | FETCHED_SOURCE ||--o{ EXTRACTED_FACT : "provides" | ||
| 219 | CLAIM_VERDICT ||--o{ CLAIM_VERDICT : "depends on" | ||
| 220 | |||
| 221 | JOB { | ||
| 222 | string JobId PK "GUID" | ||
| 223 | string Status "QUEUED|RUNNING|COMPLETE|FAILED" | ||
| 224 | int Progress "0-100" | ||
| 225 | datetime CreatedUtc | ||
| 226 | datetime UpdatedUtc | ||
| 227 | string InputType "text|url" | ||
| 228 | string InputValue "URL or text content" | ||
| 229 | string InputPreview "First 100 chars" | ||
| 230 | json ResultJson "Full analysis result" | ||
| 231 | string ReportMarkdown "Formatted report" | ||
| 232 | } | ||
| 233 | |||
| 234 | JOB_EVENT { | ||
| 235 | long Id PK | ||
| 236 | string JobId FK | ||
| 237 | datetime TsUtc | ||
| 238 | string Level "info|warn|error" | ||
| 239 | string Message | ||
| 240 | } | ||
| 241 | |||
| 242 | ANALYSIS_RESULT { | ||
| 243 | string schemaVersion "2.6.17" | ||
| 244 | string inputType "question|claim|article" | ||
| 245 | boolean isQuestion | ||
| 246 | string articleThesis | ||
| 247 | int articleTruthPercentage "0-100" | ||
| 248 | string articleVerdict "7-point scale" | ||
| 249 | json claimPattern "total/supported/uncertain/refuted" | ||
| 250 | boolean isPseudoscience | ||
| 251 | int llmCalls "Total LLM invocations" | ||
| 252 | json searchQueries "All search queries" | ||
| 253 | } | ||
| 254 | |||
| 255 | CLAIM_VERDICT { | ||
| 256 | string claimId PK "SC1, SC2, etc." | ||
| 257 | string claimText | ||
| 258 | boolean isCentral | ||
| 259 | string claimRole "attribution|source|timing|core" | ||
| 260 | string_array dependsOn "Prerequisite claim IDs" | ||
| 261 | boolean dependencyFailed | ||
| 262 | string llmVerdict "WELL-SUPPORTED|PARTIALLY-SUPPORTED|UNCERTAIN|REFUTED" | ||
| 263 | string verdict "7-point: True to False" | ||
| 264 | int confidence "0-100" | ||
| 265 | int truthPercentage "0-100" | ||
| 266 | string riskTier "A|B|C" | ||
| 267 | string reasoning | ||
| 268 | string_array supportingFactIds | ||
| 269 | string highlightColor "green to dark-red" | ||
| 270 | } | ||
| 271 | |||
| 272 | FETCHED_SOURCE { | ||
| 273 | string id PK "S1, S2, etc." | ||
| 274 | string url | ||
| 275 | string title | ||
| 276 | int trackRecordScore "0-100 or null" | ||
| 277 | string fullText "Extracted content" | ||
| 278 | datetime fetchedAt | ||
| 279 | string category "legal|news|academic" | ||
| 280 | boolean fetchSuccess | ||
| 281 | string searchQuery "Which query found this" | ||
| 282 | } | ||
| 283 | |||
| 284 | EXTRACTED_FACT { | ||
| 285 | string id PK "S1-F1, S1-F2, etc." | ||
| 286 | string fact "The factual statement" | ||
| 287 | string category "legal_provision|evidence|expert_quote|statistic|event|criticism" | ||
| 288 | string specificity "high|medium" | ||
| 289 | string sourceId FK | ||
| 290 | string sourceUrl | ||
| 291 | string sourceTitle | ||
| 292 | string sourceExcerpt | ||
| 293 | string relatedProceedingId | ||
| 294 | boolean isContestedClaim | ||
| 295 | string claimSource | ||
| 296 | } | ||
| 297 | {{/mermaid}} | ||
| 298 | |||
| 299 | ---- | ||
| 300 | |||
| 301 | == 3. Overall Architecture with Interactions == | ||
| 302 | |||
| 303 | {{mermaid}} | ||
| 304 | flowchart TB | ||
| 305 | subgraph Client["🖥️ Client Layer"] | ||
| 306 | BROWSER[Web Browser] | ||
| 307 | ANALYZE_PAGE["/analyze page<br/>React + TailwindCSS"] | ||
| 308 | JOBS_PAGE["/jobs page<br/>Job history & status"] | ||
| 309 | end | ||
| 310 | |||
| 311 | subgraph NextJS["⚡ Next.js Web App (apps/web)"] | ||
| 312 | direction TB | ||
| 313 | |||
| 314 | subgraph API_Routes["API Routes"] | ||
| 315 | ANALYZE_API["/api/fh/analyze<br/>━━━━━━━━━━━━━<br/>POST: Create job"] | ||
| 316 | JOBS_API["/api/fh/jobs<br/>━━━━━━━━━━━━━<br/>GET: List jobs<br/>POST: Create job"] | ||
| 317 | JOB_API["/api/fh/jobs/[id]<br/>━━━━━━━━━━━━━<br/>GET: Job status"] | ||
| 318 | EVENTS_API["/api/fh/jobs/[id]/events<br/>━━━━━━━━━━━━━<br/>GET: Job events (SSE)"] | ||
| 319 | RUN_JOB["/api/internal/run-job<br/>━━━━━━━━━━━━━<br/>POST: Execute analysis"] | ||
| 320 | end | ||
| 321 | |||
| 322 | subgraph Lib["Core Libraries"] | ||
| 323 | ANALYZER["analyzer.ts<br/>━━━━━━━━━━━━━<br/>AKEL Pipeline<br/>2918 lines"] | ||
| 324 | RETRIEVAL["retrieval.ts<br/>━━━━━━━━━━━━━<br/>URL content extraction"] | ||
| 325 | WEBSEARCH["web-search.ts<br/>━━━━━━━━━━━━━<br/>Search abstraction"] | ||
| 326 | MBFC["mbfc-loader.ts<br/>━━━━━━━━━━━━━<br/>Source reliability"] | ||
| 327 | end | ||
| 328 | end | ||
| 329 | |||
| 330 | subgraph DotNet["🔧 .NET API (apps/api)"] | ||
| 331 | DOTNET_API["FactHarbor.Api<br/>ASP.NET Core"] | ||
| 332 | |||
| 333 | subgraph Controllers["Controllers"] | ||
| 334 | ANALYZE_CTRL["AnalyzeController"] | ||
| 335 | JOBS_CTRL["JobsController"] | ||
| 336 | INTERNAL_CTRL["InternalJobsController"] | ||
| 337 | end | ||
| 338 | |||
| 339 | subgraph Services["Services"] | ||
| 340 | JOB_SVC["JobService<br/>━━━━━━━━━━━━━<br/>Job CRUD operations"] | ||
| 341 | RUNNER_CLIENT["RunnerClient<br/>━━━━━━━━━━━━━<br/>Calls Next.js runner"] | ||
| 342 | end | ||
| 343 | |||
| 344 | DB[(SQLite Database<br/>━━━━━━━━━━━━━<br/>JobEntity<br/>JobEventEntity)] | ||
| 345 | end | ||
| 346 | |||
| 347 | subgraph External["🌐 External Services"] | ||
| 348 | LLM_PROVIDERS["LLM Providers<br/>━━━━━━━━━━━━━<br/>• Anthropic Claude<br/>• OpenAI GPT<br/>• Google Gemini<br/>• Mistral"] | ||
| 349 | SEARCH_PROVIDERS["Search Providers<br/>━━━━━━━━━━━━━<br/>• Google CSE<br/>• SerpAPI<br/>• Brave<br/>• Tavily"] | ||
| 350 | WEB["Web Content<br/>━━━━━━━━━━━━━<br/>• News sites<br/>• PDFs<br/>• Academic sources"] | ||
| 351 | end | ||
| 352 | |||
| 353 | %% Client interactions | ||
| 354 | BROWSER --> ANALYZE_PAGE | ||
| 355 | BROWSER --> JOBS_PAGE | ||
| 356 | ANALYZE_PAGE --> ANALYZE_API | ||
| 357 | JOBS_PAGE --> JOBS_API | ||
| 358 | |||
| 359 | %% Next.js internal | ||
| 360 | ANALYZE_API --> JOBS_API | ||
| 361 | JOBS_API -->|"Proxy"| DOTNET_API | ||
| 362 | JOB_API -->|"Proxy"| DOTNET_API | ||
| 363 | EVENTS_API -->|"Proxy"| DOTNET_API | ||
| 364 | |||
| 365 | %% .NET flow | ||
| 366 | DOTNET_API --> ANALYZE_CTRL | ||
| 367 | DOTNET_API --> JOBS_CTRL | ||
| 368 | DOTNET_API --> INTERNAL_CTRL | ||
| 369 | ANALYZE_CTRL --> JOB_SVC | ||
| 370 | JOBS_CTRL --> JOB_SVC | ||
| 371 | JOB_SVC --> DB | ||
| 372 | JOB_SVC --> RUNNER_CLIENT | ||
| 373 | RUNNER_CLIENT -->|"HTTP POST"| RUN_JOB | ||
| 374 | |||
| 375 | %% Analysis execution | ||
| 376 | RUN_JOB --> ANALYZER | ||
| 377 | ANALYZER --> RETRIEVAL | ||
| 378 | ANALYZER --> WEBSEARCH | ||
| 379 | ANALYZER --> MBFC | ||
| 380 | |||
| 381 | %% External calls | ||
| 382 | ANALYZER -->|"AI SDK"| LLM_PROVIDERS | ||
| 383 | WEBSEARCH --> SEARCH_PROVIDERS | ||
| 384 | RETRIEVAL --> WEB | ||
| 385 | |||
| 386 | %% Styling | ||
| 387 | classDef external fill:#fff3e0,stroke:#e65100,stroke-width:2px | ||
| 388 | classDef core fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px | ||
| 389 | classDef api fill:#e3f2fd,stroke:#1565c0,stroke-width:2px | ||
| 390 | |||
| 391 | class LLM_PROVIDERS,SEARCH_PROVIDERS,WEB external | ||
| 392 | class ANALYZER,RETRIEVAL,WEBSEARCH,MBFC core | ||
| 393 | class ANALYZE_API,JOBS_API,JOB_API,EVENTS_API,RUN_JOB api | ||
| 394 | {{/mermaid}} | ||
| 395 | |||
| 396 | ---- | ||
| 397 | |||
| 398 | == 4. Specification vs Implementation Gap Analysis == | ||
| 399 | |||
| 400 | === 4.1 Data Model Gaps === | ||
| 401 | |||
| 402 | | Specification Entity | POC1 Status | Gap Description | | ||
| 403 | |-|-|-| | ||
| 404 | | **Claim** | ⚠️ Partial | No persistent storage; claims exist only in JSON result. Missing: `status`, `confidence_score`, `risk_score`, `completeness_score`, `version`, `views`, `edit_count` | | ||
| 405 | | **Evidence** | ⚠️ Partial | Implemented as `ExtractedFact` but lacks: `supports` enum, proper `relevance_score` | | ||
| 406 | | **Source** | ⚠️ Partial | `FetchedSource` exists but missing: `type` enum, `accuracy_history`, `correction_frequency`, weekly update scheduler | | ||
| 407 | | **Scenario** | ❌ Missing | Not implemented. Claims are evaluated directly without scenario contexts | | ||
| 408 | | **Verdict** | ⚠️ Partial | `ClaimVerdict` exists but missing: `likelihood_range`, `uncertainty_factors` array, proper `explanation_summary` | | ||
| 409 | | **User** | ❌ Missing | No user authentication or role system | | ||
| 410 | | **Edit** | ❌ Missing | No audit trail for changes | | ||
| 411 | |||
| 412 | === 4.2 AKEL Component Gaps === | ||
| 413 | |||
| 414 | | Spec Component | POC1 Status | Gap Description | | ||
| 415 | | |-|-| | ||
| 416 | | **AKEL Orchestrator** | ✅ Implemented | `runAnalysis()` function serves this role | | ||
| 417 | | **Claim Extractor** | ✅ Implemented | `understandClaim()` with claim role/dependency tracking | | ||
| 418 | | **Claim Classifier** | ⚠️ Partial | Risk tier (A/B/C) assigned, but no domain classification | | ||
| 419 | | **Scenario Generator** | ❌ Missing | Claims evaluated without scenario extraction | | ||
| 420 | | **Evidence Summarizer** | ✅ Implemented | `extractFacts()` function | | ||
| 421 | | **Contradiction Detector** | ⚠️ Partial | `isContestedClaim` flag exists but no active contradiction search | | ||
| 422 | | **Quality Gate Validator** | ❌ Missing | No source quality gates, no mandatory checks | | ||
| 423 | | **Audit Sampling Scheduler** | ❌ Missing | No audit system | | ||
| 424 | | **Embedding Handler** | ❌ Missing | Not needed for POC | | ||
| 425 | | **Federation Sync** | ❌ Missing | Not needed for POC | | ||
| 426 | |||
| 427 | === 4.3 Architecture Gaps === | ||
| 428 | |||
| 429 | | Spec Requirement | POC1 Status | Gap Description | | ||
| 430 | | |-|-| | ||
| 431 | | **Three-Layer Architecture** | ✅ Implemented | Interface (Next.js) → Processing (AKEL) → Data (SQLite) | | ||
| 432 | | **LLM Abstraction Layer** | ✅ Implemented | AI SDK supports multiple providers with failover | | ||
| 433 | | **PostgreSQL Primary DB** | ⚠️ Different | Using SQLite for simplicity (acceptable for POC) | | ||
| 434 | | **Redis Caching** | ❌ Missing | No caching layer | | ||
| 435 | | **S3 Archival** | ❌ Missing | No long-term storage | | ||
| 436 | | **Background Jobs** | ❌ Missing | No scheduler for source updates, cache warming | | ||
| 437 | | **Quality Monitoring** | ⚠️ Partial | LLM call counting exists, but no anomaly detection | | ||
| 438 | |||
| 439 | === 4.4 Publication & Review Gaps === | ||
| 440 | |||
| 441 | | Spec Feature | POC1 Status | Gap Description | | ||
| 442 | | |-|-| | ||
| 443 | | **Risk Tier Publication Rules** | ❌ Missing | All results published immediately regardless of tier | | ||
| 444 | | **Human Review Queue** | ❌ Missing | No review workflow | | ||
| 445 | | **AI-Generated Labeling** | ⚠️ Partial | Results show "AI analysis" but no formal labeling system | | ||
| 446 | | **Audit Rate Sampling** | ❌ Missing | No sampling audits | | ||
| 447 | |||
| 448 | ---- | ||
| 449 | |||
| 450 | == 5. Optimization Recommendations == | ||
| 451 | |||
| 452 | === 5.1 Cost Optimizations === | ||
| 453 | |||
| 454 | {{mermaid}} | ||
| 455 | pie title Current LLM Cost Distribution (Estimated per Analysis) | ||
| 456 | "Step 1: Understand" : 15 | ||
| 457 | "Step 2: Research (per source)" : 60 | ||
| 458 | "Step 3: Verdicts" : 25 | ||
| 459 | {{/mermaid}} | ||
| 460 | |||
| 461 | | Optimization | Estimated Savings | Implementation Effort | | ||
| 462 | | |-| | | ||
| 463 | | **Cache claim understanding** | 30-50% on repeated claims | Medium | | ||
| 464 | | **Use Haiku for fact extraction** | 40% on Step 2 costs | Low (config change) | | ||
| 465 | | **Batch fact extraction** | 20% fewer API calls | Medium | | ||
| 466 | | **Skip search for known claims** | 50%+ for cached claims | High (needs claim DB) | | ||
| 467 | | **Reduce max iterations** | Linear reduction | Low (config change) | | ||
| 468 | |||
| 469 | === 5.2 Timing Optimizations === | ||
| 470 | |||
| 471 | {{mermaid}} | ||
| 472 | gantt | ||
| 473 | title Current Analysis Timeline (Typical) | ||
| 474 | dateFormat ss | ||
| 475 | axisFormat %S sec | ||
| 476 | |||
| 477 | section Current Flow | ||
| 478 | URL Fetch :a1, 00, 2s | ||
| 479 | Step 1 Understand :a2, after a1, 15s | ||
| 480 | Search Iteration 1 :a3, after a2, 8s | ||
| 481 | Fetch Sources 1 :a4, after a3, 10s | ||
| 482 | Extract Facts 1 :a5, after a4, 12s | ||
| 483 | Search Iteration 2 :a6, after a5, 8s | ||
| 484 | Fetch Sources 2 :a7, after a6, 10s | ||
| 485 | Extract Facts 2 :a8, after a7, 12s | ||
| 486 | Generate Verdicts :a9, after a8, 15s | ||
| 487 | |||
| 488 | section Optimized Flow | ||
| 489 | URL Fetch :b1, 00, 2s | ||
| 490 | Step 1 Understand :b2, after b1, 10s | ||
| 491 | Search + Fetch (parallel) :b3, after b2, 12s | ||
| 492 | Extract Facts (batched) :b4, after b3, 8s | ||
| 493 | Generate Verdicts :b5, after b4, 10s | ||
| 494 | {{/mermaid}} | ||
| 495 | |||
| 496 | | Optimization | Time Savings | Notes | | ||
| 497 | | | |-| | ||
| 498 | | **Parallel source fetching** | Already implemented | Currently fetches 3 sources in parallel | | ||
| 499 | | **Streaming LLM responses** | 20-30% perceived | User sees progress faster | | ||
| 500 | | **Search query batching** | 10-15% | Send multiple queries to search API | | ||
| 501 | | **Reduce prompt size** | 5-10% per call | Optimize system prompts | | ||
| 502 | | **Use faster models for extraction** | 30-40% on Step 2 | Claude Haiku vs Sonnet | | ||
| 503 | |||
| 504 | === 5.3 Priority Recommendations === | ||
| 505 | |||
| 506 | 1. **HIGH PRIORITY - Implement Claim Caching** | ||
| 507 | - Cache claim verdicts by content hash | ||
| 508 | - Reduces costs for repeated/similar claims | ||
| 509 | - Enables the separated verdict architecture (see Section 6) | ||
| 510 | |||
| 511 | 2. **MEDIUM PRIORITY - Use Tiered Models** | ||
| 512 | - Step 1 (Understand): Sonnet (needs reasoning) | ||
| 513 | - Step 2 (Extract): Haiku (simple extraction) | ||
| 514 | - Step 3 (Verdicts): Sonnet (needs synthesis) | ||
| 515 | |||
| 516 | 3. **LOW PRIORITY - Add Redis Cache** | ||
| 517 | - Cache source content (24h TTL) | ||
| 518 | - Cache search results (1h TTL) | ||
| 519 | - Reduces external API calls | ||
| 520 | |||
| 521 | ---- | ||
| 522 | |||
| 523 | == 6. Separated Verdict Architecture Proposal == | ||
| 524 | |||
| 525 | === 6.1 Current Architecture === | ||
| 526 | |||
| 527 | {{mermaid}} | ||
| 528 | flowchart LR | ||
| 529 | subgraph Current["Current: Monolithic Analysis"] | ||
| 530 | INPUT[Article Input] --> ANALYZE[Full Analysis Pipeline] | ||
| 531 | ANALYZE --> CLAIMS[Claim Verdicts] | ||
| 532 | ANALYZE --> ARTICLE[Article Verdict] | ||
| 533 | CLAIMS -.->|"Aggregated"| ARTICLE | ||
| 534 | end | ||
| 535 | {{/mermaid}} | ||
| 536 | |||
| 537 | **Issues:** | ||
| 538 | - Every analysis re-processes all claims | ||
| 539 | - No caching of individual claim verdicts | ||
| 540 | - Article verdict tightly coupled to claim extraction | ||
| 541 | |||
| 542 | === 6.2 Proposed Separated Architecture === | ||
| 543 | |||
| 544 | {{mermaid}} | ||
| 545 | flowchart TB | ||
| 546 | subgraph Input["Input Processing"] | ||
| 547 | ARTICLE[Article/Text Input] | ||
| 548 | EXTRACT[Claim Extraction] | ||
| 549 | end | ||
| 550 | |||
| 551 | subgraph ClaimLayer["Claim Verdict Layer (Cacheable)"] | ||
| 552 | CACHE[(Claim Cache<br/>━━━━━━━━━━━━━<br/>Key: claim_hash<br/>TTL: 7 days)] | ||
| 553 | |||
| 554 | CLAIM1["Claim 1 Analysis"] | ||
| 555 | CLAIM2["Claim 2 Analysis"] | ||
| 556 | CLAIM3["Claim N Analysis"] | ||
| 557 | |||
| 558 | VERDICT1[Claim 1 Verdict] | ||
| 559 | VERDICT2[Claim 2 Verdict] | ||
| 560 | VERDICT3[Claim N Verdict] | ||
| 561 | end | ||
| 562 | |||
| 563 | subgraph ArticleLayer["Article Verdict Layer (Dynamic)"] | ||
| 564 | AGGREGATE[Aggregate Claim Verdicts] | ||
| 565 | CONTEXT[Apply Article Context<br/>━━━━━━━━━━━━━<br/>• Claim relationships<br/>• Logical structure<br/>• Author intent] | ||
| 566 | ARTICLE_VERDICT[Article Verdict] | ||
| 567 | end | ||
| 568 | |||
| 569 | %% Flow | ||
| 570 | ARTICLE --> EXTRACT | ||
| 571 | EXTRACT --> CLAIM1 | ||
| 572 | EXTRACT --> CLAIM2 | ||
| 573 | EXTRACT --> CLAIM3 | ||
| 574 | |||
| 575 | CLAIM1 -->|"Cache Miss"| VERDICT1 | ||
| 576 | CLAIM2 -->|"Cache Hit"| VERDICT2 | ||
| 577 | CLAIM3 -->|"Cache Miss"| VERDICT3 | ||
| 578 | |||
| 579 | CLAIM1 <-.-> CACHE | ||
| 580 | CLAIM2 <-.-> CACHE | ||
| 581 | CLAIM3 <-.-> CACHE | ||
| 582 | |||
| 583 | VERDICT1 --> AGGREGATE | ||
| 584 | VERDICT2 --> AGGREGATE | ||
| 585 | VERDICT3 --> AGGREGATE | ||
| 586 | |||
| 587 | AGGREGATE --> CONTEXT | ||
| 588 | CONTEXT --> ARTICLE_VERDICT | ||
| 589 | |||
| 590 | classDef cache fill:#fff9c4,stroke:#f57f17,stroke-width:2px | ||
| 591 | classDef dynamic fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px | ||
| 592 | class CACHE cache | ||
| 593 | class CONTEXT,ARTICLE_VERDICT dynamic | ||
| 594 | {{/mermaid}} | ||
| 595 | |||
| 596 | === 6.3 Benefits Analysis === | ||
| 597 | |||
| 598 | | Benefit | Impact | Rationale | | ||
| 599 | |-| |-| | ||
| 600 | | **Cost Reduction** | 40-70% for repeated claims | Many articles share common claims (e.g., "COVID vaccines are safe") | | ||
| 601 | | **Faster Analysis** | 50%+ for cached claims | Skip research + LLM calls for known claims | | ||
| 602 | | **Consistency** | High | Same claim always gets same verdict (until cache expires) | | ||
| 603 | | **Freshness Control** | Configurable TTL | Balance consistency vs. new evidence | | ||
| 604 | | **Scalability** | Linear improvement | More users = higher cache hit rate | | ||
| 605 | |||
| 606 | === 6.4 Implementation Considerations === | ||
| 607 | |||
| 608 | **Claim Hashing Strategy:** | ||
| 609 | {{code language="typescript"}}function getClaimHash(claim: string): string { | ||
| 610 | // Normalize: lowercase, remove punctuation, stem words | ||
| 611 | const normalized = normalize(claim); | ||
| 612 | // Hash for cache key | ||
| 613 | return crypto.createHash('sha256').update(normalized).digest('hex').slice(0, 16); | ||
| 614 | }{{/code}} | ||
| 615 | |||
| 616 | **Cache Invalidation Triggers:** | ||
| 617 | - TTL expiration (default 7 days) | ||
| 618 | - Major news event related to claim topic | ||
| 619 | - Source track record significant change | ||
| 620 | - Manual invalidation by moderator | ||
| 621 | |||
| 622 | **Article Verdict Considerations:** | ||
| 623 | - Article verdict should ALWAYS be dynamic (never cached) | ||
| 624 | - Same claims in different article contexts may yield different article verdicts | ||
| 625 | - Example: "Vaccines are safe" + "Vaccines cause autism" → article may be misleading even if first claim is true | ||
| 626 | |||
| 627 | ### 6.5 Recommendation## | ||
| 628 | |||
| 629 | **YES, separating is beneficial** with the following caveats: | ||
| 630 | |||
| 631 | 1. **Claim verdicts should be cached** with semantic similarity matching (not just exact match) | ||
| 632 | 2. **Article verdicts should always be dynamic** to account for: | ||
| 633 | - Claim relationships and logical structure | ||
| 634 | - Author's argumentative strategy | ||
| 635 | - Context and framing | ||
| 636 | - Selective use of true claims to support false conclusions | ||
| 637 | |||
| 638 | 3. **Implementation phases:** | ||
| 639 | - Phase 1: Exact-match claim caching (simple hash) | ||
| 640 | - Phase 2: Semantic similarity caching (embedding-based) | ||
| 641 | - Phase 3: Federated claim sharing across instances | ||
| 642 | |||
| 643 | ---- | ||
| 644 | |||
| 645 | == 7. Summary == | ||
| 646 | |||
| 647 | === Current State === | ||
| 648 | |||
| 649 | - POC1 implements core AKEL pipeline successfully | ||
| 650 | - Claim dependency tracking is implemented | ||
| 651 | - Multiple LLM providers supported | ||
| 652 | - No persistent claim storage or caching | ||
| 653 | |||
| 654 | === Key Gaps from Specification === | ||
| 655 | |||
| 656 | - No scenario extraction | ||
| 657 | - No user/role system | ||
| 658 | - No audit trail | ||
| 659 | - No source track record updates | ||
| 660 | - No review queue | ||
| 661 | |||
| 662 | === Recommended Next Steps === | ||
| 663 | |||
| 664 | 1. Implement claim caching layer | ||
| 665 | 2. Separate claim vs article verdict generation | ||
| 666 | 3. Add Redis for source/search caching | ||
| 667 | 4. Implement tiered model selection | ||
| 668 | 5. Add basic audit logging |