import csv, os
import networkx as nx
import frovedis.graph as fnx
from networkx.algorithms import bipartite
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


                                                                f = open("graph_source_ecopol.csv", 'rt')
try:
    reader = csv.reader(f)
    words = [i for i in csv.reader(f, delimiter=',')]
finally:
    f.close()
L = {}; g_L = []; my_word = []; my_number = []

for i in range(len(words)):
    for j in range(len(words[i])-1):
        key = words[i][0]
        info = words[i][1:]
        L[key] = info


                                                                count = 0
for key, val in L.items():
    if (count % 5000) == 0:
        print(key, val)
    count += 1

for e in L.keys():
    for w in L[e]:
        my_word.append(w)
    my_word.append(e)

unq, idx = np.unique(my_word, return_index=True)
sorted_idx = np.sort(idx)
buf = [my_word[i] for i in sorted_idx]

for e in range(len(buf)):
    my_number.append(e+1)
wd_df = pd.DataFrame({'number': my_number,'word': buf}, columns=['number', 'word'])
wd_df = wd_df.set_index('number')

market ['stock market', 'financial market', 'violently', 'capitulation', 'market participant', 'pull-back', 'directionless', 'investor', 'axi', 'jj', 'rangebound']
premier ['liu he', 'li keqiang', 'shouwen', 'handshake', 'top-level', 'mar-a-lago']


                                                                word_dic = {buf[i]: my_number[i] for i in range(len(my_number))}
g_L = [(word_dic[key], word_dic[val[w]]) for key, val in L.items() for w in range(len(list(val)))]


                                                                from frovedis.exrpc.server import FrovedisServer
FrovedisServer.initialize("mpirun -np 8 " + os.environ["FROVEDIS_SERVER"])

'[ID: 1] FrovedisServer (Hostname: handson02, Port: 34905) has been initialized with 8 MPI processes.'


                                                                G = nx.Graph()
G.add_edges_from(g_L)
fG = fnx.Graph(G)


                                                                print(word_dic['gdp'])
descendants = [wd_df.loc[w].to_numpy()[0] for w in\
 list(fnx.descendants_at_distance(fG, source=word_dic['gdp'], distance=1))]
print(sorted(descendants))
neigi = [wd_df.loc[w].to_numpy()[0] for w in  list(G.neighbors(word_dic['gdp']))]
print(sorted(neigi))
print(sorted(descendants) == sorted(neigi))

172
['19-member', 'bea', 'cbo', 'cpb', 'debt/gdp', 'economy', 'growth', 'illustrative', 'nowcast', 'shallower']
['19-member', 'bea', 'cbo', 'cpb', 'debt/gdp', 'economy', 'growth', 'illustrative', 'nowcast', 'shallower']
True


                                                                cc = sorted((fnx.connected_components(fG)), key=len)
tmp = []
print("connected_components")
for w in range(len(cc)):
    tmp.append(len(list(cc[w])))
print(tmp)
small_g = [i / np.array(tmp).sum() for i in tmp]
cc_1 = [list(cc[i]) for i in range(len(small_g)) if small_g[i] < 0.01]

tmp = []; cc_2 = []
for i in range(len(cc_1)):
    for j in range(len(cc_1[i])):
        tmp.append(wd_df.loc[cc_1[i][j]].to_numpy()[0])
    cc_2.append(tmp)
    tmp = []
print("small size clusters")
print(cc_2)

connected_components
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 17977]
small size clusters
[['inflow', 'flow'], ['garner', 'draw'], ['winner', 'loser'], ['floor', 'cashin'], ['consolidate', 'consolidation'], ['definition', 'define'], ['con', 'pro'], ['c-band', 'spectrum'], ['structure', 'variable'], ['breathe', 'sigh'], ['makeup', 'composition'], ['revert', 'revisit'], ['territory', 'uncharted'], ['concurrent', 'simultaneous'], ['serf', 'serve'], ['replacement', 'replace'], ['withdraw', 'withdrawal'], ['firmer', 'quote'], ['viacom', 'cbs'], ['fill', 'void'], ['hammond', 'carlson', 'horizon']]


                                                                dist = [len(fnx.descendants_at_distance(fG, source=w, distance=2)) for w in my_number if w % 100 ==0]
bins = range(1,1000)
fig = plt.figure()
ax1 = fig.add_subplot(111)
plt.hist(dist, bins=100)
ax1.set_xlabel('number of all nodes at a fixed distance from source in G.')
ax1.set_ylabel('Freq.')
plt.show()

for w in my_number:
    if w % 100 == 0:
        dist_list = fnx.descendants_at_distance(fG, source=w, distance=2)
        if len(dist_list) > 3500:
            print(wd_df.loc[w].to_numpy(), [wd_df.loc[e].to_numpy()[0] for e in dist_list][:50])

['intensification'] ['whitmer', 'metastasize', 'financial market', 'violently', 'capitulation', 'market participant', 'pull-back', 'directionless', 'investor', 'inertia', 'ever-increasing', 'rangebound', 'hyper', 'beijing', 'infancy', 'u.s.', 'dependency', 'counter-measures', 'cui', 'chinese goods', 'retaliate', 'country', 'phase-1', 'willems', 'countermeasure', 'tariff', 'gesture', 'guodu', 'chinese economy', 'barley', 'china', 'repriced', 'unhelpful', 'grape', 'virus', 'pandemic', 'outbreak', 'fast-spreading', 'epidemic', 'pneumonia-like', 'unison', 'flu-like', 'virus-related', 'virus-led', 'disease', 'infect', 'replenishment', 'virulent', 'chilly', 'pneumonia']
['stabilizer'] ['metastasize', 'inertia', 'ever-increasing', 'hyper', 'infancy', 'revalue', 'environmentally-friendly', 'monetisation', 'u.s.-china', '16-month-long', 'trade conflict', 'greenium', 'dependency', 'zurbruegg', 're-pricing', 'repriced', 'leer', 'unhelpful', 'adjunct', 'hillman', 'central', 'rrrs', 'sarb', 'pandemic', 'outbreak', 'unison', 'epidemic', 'replenishment', 'rapidly-spreading', 'maximise', 'virulent', 'intimate', 'pneumonia', 'continual', 'reappear', 'sickness', 'payoff', 'juicing', 'veracity', 'pathogen', 'vanilla', 'second-wave', 'country-wide', 'stickiness', 'convenient', 'devastation', 'fabian', 'deregulate', 'vocal', 'powell-led']
['unequivocal'] ['whitmer', 'metastasize', 'inertia', 'ever-increasing', 'fa', '16-month-long', 'zurbruegg', 'counter-measures', 'annette', 'cui', 'trichet', 'nadia', 'liu he', 're-pricing', 'angus', 'willems', 'emeritus', 'winston', 'nyu', 'leer', 'unhelpful', 'intellectual', 'adjunct', 'hillman', 'central', 'sarb', 'unison', 'cynthia', 'baldwin', 'rapidly-spreading', 'feinstein', 'rohit', 'intimate', 'continual', 'sickness', 'headlong', 'veracity', 'lecturer', 'stern', 'relapse', 'second-wave', 'elaine', 'tremor', 'six-member', 'monkey', 'devastation', 'fomc', 'jerome powell', 'fed chairman', 'mester']


                                                                path = list(fnx.single_source_shortest_path(fG, word_dic['gdp']))
print(len(path))

17977


                                                                pr = fnx.pagerank(fG, alpha=0.9)
pr_top = []
for key, val in pr.items():
    if val >= 0.0001:
        pr_top.append(wd_df.loc[key].to_numpy()[0])
print("pagerank: \n",pr_top)
print(" \n\n")

pagerank: 
 ['guodu', 'barley', 'nervously', '1q', 'home-price', 'pced', 'hem', 'flatlining', 'lobster', 'tariff-related', '15-day', 'valneva', 'vaccine-related', 'no-touch', 'ointment', 'encouragingly', 'townswick', 'record-keeping', 'tourism-related', 'markdowns', 'high-contact', 'garthwaite', '10-years', 'back-to-school', 'stonks', 'newbie', 'heavily-shorted', 'mb/d', 'collateralised', 'cy20', 'drub', 'dovishness', 'decarbonisation', 'favourably', 'cryptos', 'rehired', 'fcf', 'undersupply', 'prime-age', 'market-leading', 'clamour', 'gbp', 'allocator', 'bandwagon', 'preeminent', 'cny', 'klieve', 'khajuria', 'matson', 'overproduction', 'hydroxychloroquine', 'run-rate', 'higher-growth', 'high-priced', '4q', 'homebuyers', 'semi', 'sawada', 'overweighting', 'meander', 'short-duration', 'breber', 'substitution', 'on-chain', 'nflx', 'wiser', 'absorption', 'byproduct', 'chehab', 'tidy', 'stickiness', 'wiederhorn', 'ricchiuti', 'faster-growing', 'alright', 'hubris', 'de-listing', 'pull-backs', 'usd/jpy', 'cta', 'allot', 'defensiveness', 'pugh', 'sterne', 'venkateswaran', 'paracuelles', 'roache', 'elise', 'gamaleya', 'kintor', 'pekao', 'siena', 'trillion-a-day', 'ako', 'tomas', 'ici', 'concurrently', 'codelco', 'burkina', 'harmony', 'scandal-plagued', 'third-', 'sharara', 'whistle', 'havana', 'comedy', 'comic', 'hong kong', 'shanxi', 'liao', 'kitao', 'knof', 'kxl', 'andrei', 'rosn.mm', 'maine', 'evacuation', 'monde', 'reorganisation', 'chinese-owned', 'rhode', 'saikawa', 'ky', 'faa', 'deripaska', 'keystone', 'ou', 'circus', 'manchester', 'jean-pierre', 'gpb', 'sheryl', 'renmin', 'video-sharing', 'bharat', 'jo', 'hmc', 'metallon', 'indaba', 'arnaud', 'canzonieri', 'ortega', 'businesswoman', 'christophe', 'kazakh', 'spalding', 'drake']


                                                                tmp_list = list(nx.degree_centrality(G).items())
tmp_df = pd.DataFrame(tmp_list, columns=['number', 'degree'])
wd_df_pr = pd.merge(tmp_df, wd_df, on='number')
print(wd_df_pr.sort_values(by='degree', ascending=False))

       number    degree           word
7769     7770  0.013874    inescapable
8615     8616  0.013874  preoccupation
16515   16516  0.013874  businesswoman
2157     2158  0.013874      priced-in
8527     8528  0.013874        overtly
...       ...       ...            ...
16569   16570  0.000055            sba
16568   16569  0.000055            mta
16567   16568  0.000055           alex
16565   16566  0.000055           oslo
18019   18020  0.000055        mengniu

[18020 rows x 3 columns]


                                                                fig = plt.figure()
ax1 = fig.add_subplot(111)
bins = range(1,1000)
plt.hist(nx.degree_centrality(G).values(), bins=100)
ax1.set_xlabel('The degree centrality values')
ax1.set_ylabel('Freq.')
plt.show()


                                                                pos = nx.spring_layout(G)
nx.draw_networkx(G, pos, with_labels=True, alpha=0.5)
plt.axis("off")
plt.show()
plt.savefig("network.jpg", dpi=1000)

<Figure size 432x288 with 0 Axes>


                                                                FrovedisServer.shut_down()

BluStellar（ブルーステラ）

製品・ソリューション

業種・業務

企業情報

サイト内の現在位置

Frovedisによるグラフ解析

グラフ解析（グラフ理論）について

NetworkXとFrovedisによるグラフ解析

サンプルコードの処理内容

NetworkX互換グラフアルゴリズム：　¶

¶

使用するデータセット：　単語分割とStop words除去を実施した経済関連ニュース記事¶

データロード¶

グラフデータのための前処理：　全ての単語にユニークな番号を割り振り¶

グラフデータのための前処理：　各単語と割り振った番号による辞書化、ユニークな番号によるノード組合せ作成¶

Frovedisサーバ起動¶

NetworkXを使用しノード組合せを読み込み無向グラフ作成¶

Frovedis版無向グラフに変換¶

以下操作により同等な無向グラフデータがVHとVEのメモリに置かれる¶

Frovedis descendants_at_distanceとNetworkX neighborsでノード'bank'とエッジで結ばれるノードを参照¶

Frovedis connected_componentsで1つ以上のエッジを経てつながっている全ノード数を表示¶

Frovedis descendants_at_distanceで指定する距離内のエッジにより結ばれるノード数ヒストグラム¶

大規模なクラスター(3500以上）のノード（単語）リストを表示¶

Frovedis single_source_shortest_pathによる指定したノードからスタートして最短距離で辿り着ける全てのノード数を表示¶

Frovedis pagerankによる多数のノードから接続されているノード（単語）をリストアップ¶

NetworkXのdegree_centralityによる中心性を確認と、ネットワークを視覚化¶

関連リンク

サイト内の現在位置

Frovedisによるグラフ解析

グラフ解析（グラフ理論）について

NetworkXとFrovedisによるグラフ解析

サンプルコードの処理内容

NetworkX互換グラフアルゴリズム： ¶

¶

使用するデータセット： 単語分割とStop words除去を実施した経済関連ニュース記事¶

データロード¶

グラフデータのための前処理： 全ての単語にユニークな番号を割り振り¶

グラフデータのための前処理： 各単語と割り振った番号による辞書化、ユニークな番号によるノード組合せ作成¶

Frovedisサーバ起動¶

NetworkXを使用しノード組合せを読み込み無向グラフ作成¶

Frovedis版無向グラフに変換¶

以下操作により同等な無向グラフデータがVHとVEのメモリに置かれる¶

Frovedis descendants_at_distanceとNetworkX neighborsでノード'bank'とエッジで結ばれるノードを参照¶

Frovedis connected_componentsで1つ以上のエッジを経てつながっている全ノード数を表示¶

Frovedis descendants_at_distanceで指定する距離内のエッジにより結ばれるノード数ヒストグラム¶

大規模なクラスター(3500以上）のノード（単語）リストを表示¶

Frovedis single_source_shortest_pathによる指定したノードからスタートして最短距離で辿り着ける全てのノード数を表示¶

Frovedis pagerankによる多数のノードから接続されているノード（単語）をリストアップ¶

NetworkXのdegree_centralityによる中心性を確認と、ネットワークを視覚化¶

関連リンク

NetworkX互換グラフアルゴリズム：　¶

使用するデータセット：　単語分割とStop words除去を実施した経済関連ニュース記事¶

グラフデータのための前処理：　全ての単語にユニークな番号を割り振り¶

グラフデータのための前処理：　各単語と割り振った番号による辞書化、ユニークな番号によるノード組合せ作成¶