diff --git a/bibendum/ingestions/bulk_process.py b/bibendum/ingestions/bulk_process.py
index 98c73e71a90276761f1a0462a29c6d93df68f43e..934ea5169febb4698583a5a47fa2e9c03ccf2eba 100644
--- a/bibendum/ingestions/bulk_process.py
+++ b/bibendum/ingestions/bulk_process.py
@@ -75,7 +75,7 @@ def bulk_insert_containers(cursor, notices, notice_cls):
cursor.execute(insert.from_temp(container_cls, *fields)) # Update non-existing containers
d = dict(cursor.execute(others.uuid_raw(container_cls)))
- cursor.execute(f"DROP TABLE temp_{cont_name}")
+ cursor.execute(f"DROP TABLE temp_{cont_name.value}")
# update duplicates to point toward the appropriate uuid
return d
@@ -84,7 +84,7 @@ def bulk_insert_notices(cursor, notices, containers_map, notice_cls):
fields.difference_update(("authors", "editors", "identifiers", "id", "advisors", "jury", "institutions"))
cont_id = False
if notice_cls.container_cls:
- cont_id = notice_cls.container_cls.get_document_type+"_id"
+ cont_id = notice_cls.container_cls.get_document_type.value+"_id"
fields.remove("container")
fields.add(cont_id)
def genargs(raw_uid, notice):
@@ -132,7 +132,7 @@ def bulk_insert_notices(cursor, notices, containers_map, notice_cls):
# update identifiers for olds notices
d = dict(cursor.execute(others.uuid_raw(notice_cls)))
- cursor.execute(f"DROP TABLE temp_{notice_cls.get_document_type}")
+ cursor.execute(f"DROP TABLE temp_{notice_cls.get_document_type.value}")
return d
diff --git a/bibendum/ingestions/queries/copy.py b/bibendum/ingestions/queries/copy.py
index 8becdcfcd76704984e151f8b10f94c0470088841..f22ab79a7da2a054d8a2dd1399a6e85772ae6339 100644
--- a/bibendum/ingestions/queries/copy.py
+++ b/bibendum/ingestions/queries/copy.py
@@ -4,10 +4,11 @@ from bibendum.ingestions.utils import get_tables_name
raw_notice = "COPY temp_raw_notice (notice, source, source_identifier) FROM STDIN"
person = "COPY temp_person (first_name, middle, last_name, targets) FROM STDIN"
def container_template(cont_name, *fields):
+ cont_name = cont_name.value
fields = ", ".join(fields) + ","
return f"COPY temp_{cont_name} ({fields} raws_id, new) FROM STDIN"
def notice_template(cls, *fields):
- name = cls.get_document_type
+ name = cls.get_document_type.value
fields = ", ".join(fields) + ","
return f"COPY temp_{name} ({fields} raws_id, identifiers, new) FROM STDIN"
diff --git a/bibendum/ingestions/queries/insert.py b/bibendum/ingestions/queries/insert.py
index 8884f2eb0b9db508e933717616c2e50333ded349..f3b710417a427d34033308070b130dd1aa54ab07 100644
--- a/bibendum/ingestions/queries/insert.py
+++ b/bibendum/ingestions/queries/insert.py
@@ -8,7 +8,7 @@ INSERT INTO raw_notice(notice, source, source_identifier, processed)
"""
def from_temp(cls, *fields):
- name = cls.get_document_type
+ name = cls.get_document_type.value
fields = ",".join(fields)
return f"""
INSERT INTO {name} (id, {fields})
@@ -19,7 +19,7 @@ INSERT INTO {name} (id, {fields})
"""
def notice_identifier_new(cls):
- name = cls.get_document_type
+ name = cls.get_document_type.value
return f"""
INSERT INTO notice_identifier(notice_id, identifiers)
SELECT id, identifiers FROM
@@ -29,7 +29,7 @@ INSERT INTO notice_identifier(notice_id, identifiers)
"""
def notice_raws_new(cls):
- name = cls.get_document_type
+ name = cls.get_document_type.value
return f"""
INSERT INTO notice_to_raw (notice_id, raws_id)
SELECT id, raws_id FROM
diff --git a/bibendum/ingestions/queries/others.py b/bibendum/ingestions/queries/others.py
index 6db7791d2415c38879be6d58715c47d0b3a65947..cee625ba92f44934665538aeb48bbe07c371381c 100644
--- a/bibendum/ingestions/queries/others.py
+++ b/bibendum/ingestions/queries/others.py
@@ -3,6 +3,6 @@ ingested_raw_unsplit = "SELECT id, notice FROM raw_notice WHERE processed='Inges
def uuid_raw(cls):
- name = cls.get_document_type
+ name = cls.get_document_type.value
return f"""
SELECT raw_id, id FROM temp_{name}, UNNEST(raws_id) as raw_id"""
diff --git a/bibendum/ingestions/queries/temporary_tables.py b/bibendum/ingestions/queries/temporary_tables.py
index 3991a44368fa226ddc5cef752bda5ebbd863a2f2..d0417dd50382744b64e7a75768bc0c254d1d6ebe 100644
--- a/bibendum/ingestions/queries/temporary_tables.py
+++ b/bibendum/ingestions/queries/temporary_tables.py
@@ -6,7 +6,7 @@ raw_notice = "CREATE TEMPORARY TABLE temp_raw_notice AS SELECT * FROM raw_notice
doctype_to_class = notice.Notice.doctype_to_class
def create_cont_temp(cls, *fields):
- name = cls.get_document_type
+ name = cls.get_document_type.value
fields = "\n " + ",\n ".join(fields)+","
return f"""
CREATE TEMPORARY TABLE temp_{name} AS
@@ -19,7 +19,7 @@ CREATE TEMPORARY TABLE temp_{name} AS
"""
def create_not_temp(cls, *fields):
- name = cls.get_document_type
+ name = cls.get_document_type.value
fields = "\n " + ",\n ".join(fields)+","
return f"""
CREATE TEMPORARY TABLE temp_{name} AS
diff --git a/bibendum/ingestions/queries/update.py b/bibendum/ingestions/queries/update.py
index 072f3a234348efa365e520f4544341ac19cefb55..2ae6fb89921962f711a5d3c852322418f263eade 100644
--- a/bibendum/ingestions/queries/update.py
+++ b/bibendum/ingestions/queries/update.py
@@ -3,7 +3,7 @@ flag_notices_success = "UPDATE raw_notice SET processed='Processed'::RAW_STATUS
def from_temporary(cls):
- name = cls.get_document_type
+ name = cls.get_document_type.value
ifields = ", ".join(f"i.{e}" for e in cls.unique_keys)
tfields = ", ".join(f"t.{e}" for e in cls.unique_keys)
return f"""
@@ -16,7 +16,7 @@ UPDATE temp_{name} as t
"""
def temporary_uuid(cls, cont_id=False):
- name = cls.get_document_type
+ name = cls.get_document_type.value
fields = list(cls.unique_keys)
if cont_id:
idx = fields.index("container")
@@ -51,7 +51,7 @@ UPDATE temp_person
"""
def notice_identifier_old(cls):
- name = cls.get_document_type
+ name = cls.get_document_type.value
return f"""
UPDATE notice_identifier as i
SET
@@ -71,7 +71,7 @@ UPDATE notice_identifier as i
"""
def notice_raw_old(cls):
- name = cls.get_document_type
+ name = cls.get_document_type.value
return f"""
UPDATE notice_to_raw as i
SET
diff --git a/bibendum/sql.py b/bibendum/sql.py
index 5bb007a4bff828159df77a4c9673fe1458c177fc..5559c7c7c70677f8637a7ee33d73760c9dd3a5d4 100644
--- a/bibendum/sql.py
+++ b/bibendum/sql.py
@@ -37,7 +37,7 @@ SELECT base.*, raws_id FROM
tbl_name = document_type.value
join = f"""
{tbl_name} INNER JOIN notice_identifier ON {tbl_name}.id = notice_identifier.notice_id
- INNER JOIN notice_to_raw ON {tbl_name}.id = notice_to_raw.notice_id
+ INNER JOIN notice_to_raw ON {tbl_name}.id = notice_to_raw.notice_id INNER JOIN raw_notice
"""
cont_data = ""
if notice.container_cls:
@@ -45,10 +45,10 @@ SELECT base.*, raws_id FROM
join = f"{join} LEFT JOIN {cnt_name} ON {tbl_name}.{cnt_name}_id = {cnt_name}.id"
cont_data = f", row_to_json({cnt_name}.*) as container"
return f"""
-SELECT {tbl_name}.*,p.authors{cont_data}, to_json(notice_identifier.identifiers) as identifiers, notice_to_raw.raws_id
+SELECT {tbl_name}.*,p.authors{cont_data}, to_json(notice_identifier.identifiers) as identifiers, ROW(raw_notice.*)
FROM {join} LEFT JOIN
(
- SELECT target_id, json_agg(ROW(T.id, T.first_name, T.middle, T.last_name)::person) as authors
+ SELECT target_id, json_agg(ROW(T.id, T.first_name, T.middle, T.last_name)::person) as authors,
FROM
(SELECT target_id, person.*
FROM
diff --git a/update_ingested_script.py b/update_ingested_script.py
index 8d15f6bbf1535637fdc805e455897114eacf2d17..a68b8a7029f5eaca5cd86b15723fbc0107786233 100644
--- a/update_ingested_script.py
+++ b/update_ingested_script.py
@@ -2,9 +2,9 @@ import sys
from bibendum.models import NoticeSource
import bibendum.ingestions as ing
from multiprocessing import Process
-size = 50000
-procnb_crossref = 5
-procnb_hal = 2
+size = 200000
+procnb_crossref = 1
+procnb_hal = 1
def ingest(source):
ing.bulk_process.process_raw_notices_unsplit(dict(dbname="bibendum"), source, size=size)