Here i am going to explain one of the project realtime scenario how to read json content and compare csv to oracle table data.
scenario is:
When a user uploads a csv, xls, or text file using the DTU tool, the file is stored into an S3 bucket and a kafka message with the following information is generated in json format.
Dev team using spark, scala read this file and ingest the data into table
As a QA team, we need to read json, csv and table and compare the data using python.
Here file is in local folder and table is oracle
{
"file_name" : "G:\\ETL_Automation\\data\\dept.csv",
"file_type“ : "csv" ,
"table" : "dept"
}
================== Code here======================
import pandas as pd
import json
import cx_Oracle as ora
# Read json file
try:
file_json = r'G:\ETL_Automation\data\Json_file_Oracle.json'
with open(file_json, 'r') as file:
data = json.load(file)
# get file name, type and table name from json
file_name = data.get('file_name', '')
file_type = data.get('file_type', '')
tbl_name = data.get('table', '')
# read csv and if file type or file not present then throw error
try:
if file_type=="csv":
try:
df=pd.read_csv(file_name,delimiter=",",header=None)
data_list = [list(row) for row in df.values]
data_list.sort()
#print(data_list)
except FileNotFoundError:
print("File not found")
else:
print("File Extension is wrong")
except Exception as err:
print(err)
# read table, if table not found throw error
try:
conn= ora.connect('scott/tiger@localhost')
cursor= conn.cursor()
sql_query= "select * from %s" % tbl_name
cursor.execute(sql_query)
row = cursor.fetchall()
ora_df_list = []
for i in row:
ora_df_list.append(list(i))
ora_df_list.sort()
#print(ora_df_list)
except ora.Error as error:
print(error)
cursor.close()
conn.close()
# compare csv and table , if count not match throw error
try:
file_count= len(data_list)
tbl_count= len(ora_df_list)
if file_count== tbl_count:
if data_list==ora_df_list:
print("File, table count matching but data matching")
print("File count is ",file_count)
print("Table count is ",tbl_count)
else:
print("File, table count matching but data not matching")
print("File count is ",file_count)
print("Table count is ",tbl_count)
else:
print("File, table count not matching")
print("File count is ",file_count)
print("Table count is ",tbl_count)
except Exception as err:
print(err)
except FileNotFoundError:
print("File not found")
0 Comments
Thanks for your message.