Last modified by Robert Schaub on 2026/02/08 08:12

From version 4.1
edited by Robert Schaub
on 2026/01/02 10:03
Change comment: There is no comment for this version
To version 9.1
edited by Robert Schaub
on 2026/01/02 10:13
Change comment: Rollback to version 7.1

Summary

Details

Page properties
Title
... ... @@ -1,1 +1,1 @@
1 -POC1 Architecture Analysis - 1.Jan.26
1 +FactHarbor POC1 Architecture Analysis
Content
... ... @@ -1,6 +1,5 @@
1 1  = FactHarbor POC1 Architecture Analysis =
2 2  
3 -
4 4  **Version:** 2.6.17
5 5  **Analysis Date:** January 2026
6 6  **Document Purpose:** Technical diagrams, gap analysis, and optimization recommendations
... ... @@ -93,12 +93,123 @@
93 93  
94 94  ----
95 95  
96 -
97 97  == 2. ERD Data Model (Current POC1 Implementation) ==
98 98  
97 +**Data Objects ERD**
99 99  
100 100  {{mermaid}}
101 101  erDiagram
101 + ARTICLE ||--o{ CLAIM : "contains"
102 + ARTICLE ||--|| ARTICLE_VERDICT : "has"
103 + CLAIM ||--|| CLAIM_VERDICT : "has"
104 + CLAIM ||--o{ CLAIM : "depends on"
105 + CLAIM_VERDICT }o--o{ EVIDENCE : "supported by"
106 + SOURCE ||--o{ EVIDENCE : "provides"
107 + ARTICLE ||--o{ SOURCE : "references"
108 +
109 + ARTICLE {
110 + string id PK "Unique identifier (job ID)"
111 + string inputType "text | url"
112 + string inputValue "Original URL or text"
113 + string articleThesis "Main argument/thesis"
114 + string detectedInputType "question | claim | article"
115 + boolean isQuestion "True if input is a question"
116 + datetime createdAt "Analysis timestamp"
117 + datetime updatedAt "Last update"
118 + json distinctProceedings "Legal proceedings if any"
119 + boolean hasMultipleProceedings "Multi-proceeding flag"
120 + string proceedingContext "Context for proceedings"
121 + json logicalFallacies "Detected fallacies array"
122 + boolean isPseudoscience "Pseudoscience detection"
123 + string_array pseudoscienceCategories "Categories if detected"
124 + int llmCalls "Total LLM API calls"
125 + json searchQueries "All search queries performed"
126 + string schemaVersion "e.g. 2.6.17"
127 + }
128 +
129 + CLAIM {
130 + string id PK "SC1, SC2, C1, etc."
131 + string articleId FK "Parent article"
132 + string text "The claim statement"
133 + string type "legal | procedural | factual | evaluative"
134 + string claimRole "attribution | source | timing | core"
135 + string_array dependsOn "IDs of prerequisite claims"
136 + string_array keyEntities "Named entities in claim"
137 + boolean isCentral "Is this a central claim?"
138 + string relatedProceedingId "Linked proceeding if any"
139 + int startOffset "Position in original text"
140 + int endOffset "End position in original text"
141 + string approximatePosition "Descriptive position"
142 + }
143 +
144 + CLAIM_VERDICT {
145 + string id PK "Same as claim ID"
146 + string claimId FK "Reference to claim"
147 + string llmVerdict "WELL-SUPPORTED | PARTIALLY-SUPPORTED | UNCERTAIN | REFUTED"
148 + string verdict "True | Mostly True | Leaning True | Unverified | Leaning False | Mostly False | False"
149 + int confidence "0-100 LLM confidence"
150 + int truthPercentage "0-100 calibrated truth score"
151 + string riskTier "A (high) | B (medium) | C (low)"
152 + string reasoning "Explanation of verdict"
153 + string_array supportingFactIds "Evidence IDs supporting this"
154 + boolean dependencyFailed "True if prerequisite failed"
155 + string_array failedDependencies "Which deps failed"
156 + string highlightColor "green | light-green | yellow | orange | dark-orange | red | dark-red"
157 + boolean isPseudoscience "Pseudoscience flag"
158 + string escalationReason "Why verdict was escalated"
159 + }
160 +
161 + ARTICLE_VERDICT {
162 + string id PK "Same as article ID"
163 + string articleId FK "Reference to article"
164 + string llmArticleVerdict "Original LLM verdict"
165 + int llmArticleConfidence "Original LLM confidence"
166 + string articleVerdict "True | Mostly True | Leaning True | Unverified | Leaning False | Mostly False | False"
167 + int articleTruthPercentage "0-100 calibrated score"
168 + string articleVerdictReason "Why verdict differs from claims avg"
169 + int claimsAverageTruthPercentage "Average of claim verdicts"
170 + string claimsAverageVerdict "7-point average verdict"
171 + int claimsTotal "Total claims analyzed"
172 + int claimsSupported "Claims with truth >= 72%"
173 + int claimsUncertain "Claims with truth 43-71%"
174 + int claimsRefuted "Claims with truth < 43%"
175 + int centralClaimsTotal "Number of central claims"
176 + int centralClaimsSupported "Central claims supported"
177 + }
178 +
179 + EVIDENCE {
180 + string id PK "S1-F1, S1-F2 format"
181 + string sourceId FK "Reference to source"
182 + string claimId FK "Optional: specific claim this supports"
183 + string fact "The factual statement extracted"
184 + string category "legal_provision | evidence | expert_quote | statistic | event | criticism"
185 + string specificity "high | medium"
186 + string sourceExcerpt "Original text excerpt"
187 + string relatedProceedingId "Linked proceeding if any"
188 + boolean isContestedClaim "Is this a contested assertion"
189 + string claimSource "Who made contested claim"
190 + }
191 +
192 + SOURCE {
193 + string id PK "S1, S2, etc."
194 + string articleId FK "Parent article"
195 + string url "Full URL"
196 + string title "Page/document title"
197 + string domain "Extracted domain"
198 + int trackRecordScore "0-100 reliability score or null"
199 + string fullText "Extracted content"
200 + datetime fetchedAt "When content was fetched"
201 + string category "news | academic | government | legal"
202 + boolean fetchSuccess "True if fetch succeeded"
203 + string searchQuery "Which query found this"
204 + string mimeType "text/html | application/pdf"
205 + }
206 +{{/mermaid}}
207 +
208 +**Data Usage ERD**
209 +
210 +{{mermaid}}
211 +erDiagram
102 102   JOB ||--o{ JOB_EVENT : "has"
103 103   JOB ||--|| ANALYSIS_RESULT : "produces"
104 104   ANALYSIS_RESULT ||--o{ CLAIM_VERDICT : "contains"
... ... @@ -188,10 +188,8 @@
188 188  
189 189  ----
190 190  
191 -
192 192  == 3. Overall Architecture with Interactions ==
193 193  
194 -
195 195  {{mermaid}}
196 196  flowchart TB
197 197   subgraph Client["🖥️ Client Layer"]
... ... @@ -287,14 +287,10 @@
287 287  
288 288  ----
289 289  
290 -
291 291  == 4. Specification vs Implementation Gap Analysis ==
292 292  
293 -
294 -
295 295  === 4.1 Data Model Gaps ===
296 296  
297 -
298 298  | Specification Entity | POC1 Status | Gap Description |
299 299  |-|-|-|
300 300  | **Claim** | ⚠️ Partial | No persistent storage; claims exist only in JSON result. Missing: `status`, `confidence_score`, `risk_score`, `completeness_score`, `version`, `views`, `edit_count` |
... ... @@ -322,7 +322,6 @@
322 322  
323 323  === 4.3 Architecture Gaps ===
324 324  
325 -
326 326  | Spec Requirement | POC1 Status | Gap Description |
327 327  | |-|-|
328 328  | **Three-Layer Architecture** | ✅ Implemented | Interface (Next.js) → Processing (AKEL) → Data (SQLite) |
... ... @@ -335,7 +335,6 @@
335 335  
336 336  === 4.4 Publication & Review Gaps ===
337 337  
338 -
339 339  | Spec Feature | POC1 Status | Gap Description |
340 340  | |-|-|
341 341  | **Risk Tier Publication Rules** | ❌ Missing | All results published immediately regardless of tier |
... ... @@ -345,14 +345,10 @@
345 345  
346 346  ----
347 347  
348 -
349 349  == 5. Optimization Recommendations ==
350 350  
351 -
352 -
353 353  === 5.1 Cost Optimizations ===
354 354  
355 -
356 356  {{mermaid}}
357 357  pie title Current LLM Cost Distribution (Estimated per Analysis)
358 358   "Step 1: Understand" : 15
... ... @@ -370,7 +370,6 @@
370 370  
371 371  === 5.2 Timing Optimizations ===
372 372  
373 -
374 374  {{mermaid}}
375 375  gantt
376 376   title Current Analysis Timeline (Typical)
... ... @@ -406,7 +406,6 @@
406 406  
407 407  === 5.3 Priority Recommendations ===
408 408  
409 -
410 410  1. **HIGH PRIORITY - Implement Claim Caching**
411 411   - Cache claim verdicts by content hash
412 412   - Reduces costs for repeated/similar claims
... ... @@ -424,14 +424,10 @@
424 424  
425 425  ----
426 426  
427 -
428 428  == 6. Separated Verdict Architecture Proposal ==
429 429  
430 -
431 -
432 432  === 6.1 Current Architecture ===
433 433  
434 -
435 435  {{mermaid}}
436 436  flowchart LR
437 437   subgraph Current["Current: Monolithic Analysis"]
... ... @@ -447,10 +447,8 @@
447 447  - No caching of individual claim verdicts
448 448  - Article verdict tightly coupled to claim extraction
449 449  
450 -
451 451  === 6.2 Proposed Separated Architecture ===
452 452  
453 -
454 454  {{mermaid}}
455 455  flowchart TB
456 456   subgraph Input["Input Processing"]
... ... @@ -503,10 +503,8 @@
503 503   class CONTEXT,ARTICLE_VERDICT dynamic
504 504  {{/mermaid}}
505 505  
506 -
507 507  === 6.3 Benefits Analysis ===
508 508  
509 -
510 510  | Benefit | Impact | Rationale |
511 511  |-| |-|
512 512  | **Cost Reduction** | 40-70% for repeated claims | Many articles share common claims (e.g., "COVID vaccines are safe") |
... ... @@ -554,11 +554,8 @@
554 554  
555 555  ----
556 556  
557 -
558 558  == 7. Summary ==
559 559  
560 -
561 -
562 562  === Current State ===
563 563  
564 564  - POC1 implements core AKEL pipeline successfully
... ... @@ -566,7 +566,6 @@
566 566  - Multiple LLM providers supported
567 567  - No persistent claim storage or caching
568 568  
569 -
570 570  === Key Gaps from Specification ===
571 571  
572 572  - No scenario extraction
... ... @@ -575,7 +575,6 @@
575 575  - No source track record updates
576 576  - No review queue
577 577  
578 -
579 579  === Recommended Next Steps ===
580 580  
581 581  1. Implement claim caching layer